diff --git a/.gitattributes b/.gitattributes
index a6344aac8c09253b3b630fb776ae94478aa0275b..76d0075938bab7774619ea0d772ab821a57c9aad 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+obsei_module/images/obsei_flow.gif filter=lfs diff=lfs merge=lfs -text
+obsei_module/obsei-master/images/obsei_flow.gif filter=lfs diff=lfs merge=lfs -text
diff --git a/obsei_module/.github/ISSUE_TEMPLATE/bug_report.md b/obsei_module/.github/ISSUE_TEMPLATE/bug_report.md
new file mode 100644
index 0000000000000000000000000000000000000000..c6915c4ae905cb402e1dc710b3daafb8f6360df4
--- /dev/null
+++ b/obsei_module/.github/ISSUE_TEMPLATE/bug_report.md
@@ -0,0 +1,27 @@
+---
+name: Bug report
+about: Create a report to help us improve
+title: "[BUG]"
+labels: bug
+assignees: lalitpagaria
+
+---
+
+**Describe the bug**
+A clear and concise description of what the bug is.
+
+**To Reproduce**
+Steps to reproduce the behavior:
+
+**Expected behavior**
+A clear and concise description of what you expected to happen.
+
+**Stacktrace**
+If applicable, add stacktrace to help explain your problem.
+
+**Please complete the following information:**
+ - OS:
+ - Version:
+
+**Additional context**
+Add any other context about the problem here.
diff --git a/obsei_module/.github/ISSUE_TEMPLATE/feature_request.md b/obsei_module/.github/ISSUE_TEMPLATE/feature_request.md
new file mode 100644
index 0000000000000000000000000000000000000000..11fc491ef1dae316f2b06bbb40eaba9c757fdfd1
--- /dev/null
+++ b/obsei_module/.github/ISSUE_TEMPLATE/feature_request.md
@@ -0,0 +1,20 @@
+---
+name: Feature request
+about: Suggest an idea for this project
+title: ''
+labels: enhancement
+assignees: ''
+
+---
+
+**Is your feature request related to a problem? Please describe.**
+A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
+
+**Describe the solution you'd like**
+A clear and concise description of what you want to happen.
+
+**Describe alternatives you've considered**
+A clear and concise description of any alternative solutions or features you've considered.
+
+**Additional context**
+Add any other context or screenshots about the feature request here.
diff --git a/obsei_module/.github/dependabot.yml b/obsei_module/.github/dependabot.yml
new file mode 100644
index 0000000000000000000000000000000000000000..2c7d1708395e202b3b3316391f35bf4183ebd045
--- /dev/null
+++ b/obsei_module/.github/dependabot.yml
@@ -0,0 +1,7 @@
+version: 2
+updates:
+  # Maintain dependencies for GitHub Actions
+  - package-ecosystem: "github-actions"
+    directory: "/"
+    schedule:
+      interval: "daily"
diff --git a/obsei_module/.github/release-drafter.yml b/obsei_module/.github/release-drafter.yml
new file mode 100644
index 0000000000000000000000000000000000000000..794187190e6f3fb290174970df09c18306b58a39
--- /dev/null
+++ b/obsei_module/.github/release-drafter.yml
@@ -0,0 +1,33 @@
+name-template: 'v$RESOLVED_VERSION 🌈'
+tag-template: 'v$RESOLVED_VERSION'
+categories:
+  - title: '🚀 Features'
+    labels:
+      - 'feature'
+      - 'enhancement'
+  - title: '🐛 Bug Fixes'
+    labels:
+      - 'fix'
+      - 'bugfix'
+      - 'bug'
+  - title: '🧰 Maintenance'
+    label: 'chore'
+  - title: '⚠️Breaking Changes'
+    label: 'breaking changes'
+change-template: '- $TITLE @$AUTHOR (#$NUMBER)'
+change-title-escapes: '\<*_&' # You can add # and @ to disable mentions, and add ` to disable code blocks.
+version-resolver:
+  major:
+    labels:
+      - 'major'
+  minor:
+    labels:
+      - 'minor'
+  patch:
+    labels:
+      - 'patch'
+  default: patch
+template: |
+  ## Changes
+
+  $CHANGES
\ No newline at end of file
diff --git a/obsei_module/.github/workflows/build.yml b/obsei_module/.github/workflows/build.yml
new file mode 100644
index 0000000000000000000000000000000000000000..767b04e369bceb740995187c9c3dfda5e3a90325
--- /dev/null
+++ b/obsei_module/.github/workflows/build.yml
@@ -0,0 +1,54 @@
+# This workflow will install Python dependencies, run test and lint with a single version of Python
+# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
+
+name: CI
+
+on:
+  push:
+    branches: [ master ]
+  pull_request:
+    branches: [ master ]
+
+jobs:
+  type-check:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: '3.10'
+      - name: Test with mypy
+        run: |
+          pip install mypy
+          # Refer http://mypy-lang.blogspot.com/2021/06/mypy-0900-released.html
+          pip install mypy types-requests types-python-dateutil types-PyYAML types-dateparser types-protobuf types-pytz
+          mypy obsei
+
+  build-and-test:
+    needs: type-check
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ ubuntu-latest, macos-latest, windows-latest ]
+        python-version: ['3.8', '3.9', '3.10', '3.11']
+
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install '.[dev,all]'
+          pip install --upgrade --upgrade-strategy eager trafilatura
+          python -m spacy download en_core_web_lg
+          python -m spacy download en_core_web_sm
+
+      - name: Test with pytest
+        run: |
+          coverage run -m pytest
+          coverage report -m
diff --git a/obsei_module/.github/workflows/pypi_publish.yml b/obsei_module/.github/workflows/pypi_publish.yml
new file mode 100644
index 0000000000000000000000000000000000000000..316334bb75c8e3fff0974b52fa85ffadcbb0b289
--- /dev/null
+++ b/obsei_module/.github/workflows/pypi_publish.yml
@@ -0,0 +1,35 @@
+# This workflows will upload a Python Package using Twine when a release is created
+# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
+
+name: Upload Python Package
+
+on:
+  workflow_dispatch:
+  release:
+    types: [published]
+
+jobs:
+  deploy-pypi-artifact:
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Set up Python
+      uses: actions/setup-python@v5
+      with:
+        python-version: '3.8'
+
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install setuptools wheel twine hatch
+
+    - name: publish to PyPI
+      if: github.event_name != 'pull_request'
+      env:
+        TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
+        TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
+      run: |
+        hatch build
+        twine upload dist/*
diff --git a/obsei_module/.github/workflows/release_draft.yml b/obsei_module/.github/workflows/release_draft.yml
new file mode 100644
index 0000000000000000000000000000000000000000..2ed3737754610ea9c71896646975b34355580b4e
--- /dev/null
+++ b/obsei_module/.github/workflows/release_draft.yml
@@ -0,0 +1,15 @@
+name: release draft
+
+on:
+  workflow_dispatch:
+
+jobs:
+  draft-release:
+#    if: startsWith(github.ref, 'refs/tags/')
+    runs-on: ubuntu-latest
+    steps:
+      - uses: release-drafter/release-drafter@v6
+        with:
+          config-name: release-drafter.yml
+        env:
+          GITHUB_TOKEN: ${{ secrets.RELEASE_DRAFT_TOKEN }}
\ No newline at end of file
diff --git a/obsei_module/.github/workflows/sdk_docker_publish.yml b/obsei_module/.github/workflows/sdk_docker_publish.yml
new file mode 100644
index 0000000000000000000000000000000000000000..70daa5e698326d30b5d1cee9ba8a5e9213bda1b7
--- /dev/null
+++ b/obsei_module/.github/workflows/sdk_docker_publish.yml
@@ -0,0 +1,50 @@
+# This workflows will upload a Python Package using Twine when a release is created
+# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
+
+name: Publish SDK docker image
+
+on:
+  workflow_dispatch:
+    inputs:
+      tag:
+        description: 'Image tag'
+        required: true
+  release:
+    types: [published]
+
+jobs:
+  deploy-sdk-docker:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Docker meta
+      id: docker_meta
+      uses: docker/metadata-action@v5
+      with:
+        images: obsei/obsei-sdk
+
+    - name: Set up QEMU
+      uses: docker/setup-qemu-action@v3
+
+    - name: Set up Docker Buildx
+      uses: docker/setup-buildx-action@v3
+
+    - name: Login to DockerHub
+      if: github.event_name != 'pull_request'
+      uses: docker/login-action@v3
+      with:
+        username: ${{ secrets.DOCKERHUB_USERNAME }}
+        password: ${{ secrets.DOCKERHUB_TOKEN }}
+
+    - name: Build and push
+      uses: docker/build-push-action@v5
+      with:
+        context: ./
+        file: ./Dockerfile
+        push: ${{ github.event_name != 'pull_request' }}
+        tags: ${{ steps.docker_meta.outputs.tags }}
+        labels: ${{ steps.docker_meta.outputs.labels }}
+
+    - name: Image digest
+      run: echo ${{ steps.docker_build.outputs.digest }}
diff --git a/obsei_module/.github/workflows/ui_docker_publish.yml b/obsei_module/.github/workflows/ui_docker_publish.yml
new file mode 100644
index 0000000000000000000000000000000000000000..2fc690055b8d20c9462412352f2d75f8a6710447
--- /dev/null
+++ b/obsei_module/.github/workflows/ui_docker_publish.yml
@@ -0,0 +1,50 @@
+# This workflows will upload a Python Package using Twine when a release is created
+# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
+
+name: Publish UI Docker image
+
+on:
+  workflow_dispatch:
+    inputs:
+      tag:
+        description: 'Image tag'
+        required: true
+  release:
+    types: [published]
+
+jobs:
+  deploy-ui-docker:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Docker meta
+      id: docker_meta
+      uses: docker/metadata-action@v5
+      with:
+        images: obsei/obsei-ui-demo
+
+    - name: Set up QEMU
+      uses: docker/setup-qemu-action@v3
+
+    - name: Set up Docker Buildx
+      uses: docker/setup-buildx-action@v3
+
+    - name: Login to DockerHub
+      if: github.event_name != 'pull_request'
+      uses: docker/login-action@v3
+      with:
+        username: ${{ secrets.DOCKERHUB_USERNAME }}
+        password: ${{ secrets.DOCKERHUB_TOKEN }}
+
+    - name: Build and push
+      uses: docker/build-push-action@v5
+      with:
+        context: "{{defaultContext}}:sample-ui"
+        file: Dockerfile
+        push: ${{ github.event_name != 'pull_request' }}
+        tags: ${{ steps.docker_meta.outputs.tags }}
+        labels: ${{ steps.docker_meta.outputs.labels }}
+
+    - name: Image digest
+      run: echo ${{ steps.docker_build.outputs.digest }}
diff --git a/obsei_module/.gitignore b/obsei_module/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..80dd90d9cb4c179a40e922c4a9482c3afe64a999
--- /dev/null
+++ b/obsei_module/.gitignore
@@ -0,0 +1,148 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+/.idea/*
+*.db
+models*
+
+# OSX custom attributes
+.DS_Store
+
+# VS code configuration
+.vscode/*
diff --git a/obsei_module/.pre-commit-config.yaml b/obsei_module/.pre-commit-config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7631ed863745fe51f97e33d1b98b0aeb5ef43b70
--- /dev/null
+++ b/obsei_module/.pre-commit-config.yaml
@@ -0,0 +1,21 @@
+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.3.0
+    hooks:
+      - id: check-yaml
+      - id: trailing-whitespace
+      - id: requirements-txt-fixer
+      - id: end-of-file-fixer
+
+  - repo: https://github.com/psf/black
+    rev: 22.10.0
+    hooks:
+      - id: black
+
+  - repo: https://github.com/pre-commit/mirrors-mypy
+    rev: v0.991
+    hooks:
+      - id: mypy
+        args: [--ignore-missing-imports]
+        additional_dependencies: [types-all]
+        files: ^obsei/
diff --git a/obsei_module/.pyup.yml b/obsei_module/.pyup.yml
new file mode 100644
index 0000000000000000000000000000000000000000..b55ad548d5705a6c15d6f79192892e7612dbc2a3
--- /dev/null
+++ b/obsei_module/.pyup.yml
@@ -0,0 +1,5 @@
+# autogenerated pyup.io config file
+# see https://pyup.io/docs/configuration/ for all available options
+
+schedule: ''
+update: insecure
diff --git a/obsei_module/ATTRIBUTION.md b/obsei_module/ATTRIBUTION.md
new file mode 100644
index 0000000000000000000000000000000000000000..fc6f436d7be74b3ca7d9bbcdcd7d823fb52f7a2e
--- /dev/null
+++ b/obsei_module/ATTRIBUTION.md
@@ -0,0 +1,18 @@
+This could not have been possible without following open source software -
+- [searchtweets-v2](https://github.com/twitterdev/search-tweets-python): For Twitter's API v2 wrapper
+- [vaderSentiment](https://github.com/cjhutto/vaderSentiment): For rule-based sentiment analysis
+- [transformers](https://github.com/huggingface/transformers): For text-classification pipeline
+- [atlassian-python-api](https://github.com/atlassian-api/atlassian-python-api): To interact with Jira
+- [elasticsearch](https://github.com/elastic/elasticsearch-py): To interact with Elasticsearch
+- [pydantic](https://github.com/samuelcolvin/pydantic): For data validation
+- [sqlalchemy](https://github.com/sqlalchemy/sqlalchemy): As SQL toolkit to access DB storage
+- [google-play-scraper](https://github.com/JoMingyu/google-play-scraper): To fetch the Google Play Store review without authentication
+- [praw](https://github.com/praw-dev/praw): For Reddit client
+- [reddit-rss-reader](https://github.com/lalitpagaria/reddit-rss-reader): For Reddit scrapping
+- [app-store-reviews-reader](https://github.com/lalitpagaria/app_store_reviews_reader): For App Store reviews scrapping
+- [slack-sdk](https://github.com/slackapi/python-slack-sdk): For slack integration
+- [presidio-anonymizer](https://github.com/microsoft/presidio): Personal information anonymizer
+- [GoogleNews](https://github.com/Iceloof/GoogleNews): For Google News integration
+- [python-facebook-api](https://github.com/sns-sdks/python-facebook): For facebook integration
+- [youtube-comment-downloader](https://github.com/egbertbouman/youtube-comment-downloader): For Youtube video comments extraction code
+- [dateparser](https://github.com/scrapinghub/dateparser): To parse date properly (where format is ambiguous)
\ No newline at end of file
diff --git a/obsei_module/CITATION.cff b/obsei_module/CITATION.cff
new file mode 100644
index 0000000000000000000000000000000000000000..bd12a46b8dc05be975f138e2357ebef65de9ade3
--- /dev/null
+++ b/obsei_module/CITATION.cff
@@ -0,0 +1,14 @@
+# YAML 1.2
+---
+authors: 
+  -
+    family-names: Pagaria
+    given-names: Lalit
+
+cff-version: "1.1.0"
+license: "Apache-2.0"
+message: "If you use this software, please cite it using this metadata."
+repository-code: "https://github.com/obsei/obsei"
+title: "Obsei - a low code AI powered automation tool"
+version: "0.0.10"
+...
diff --git a/obsei_module/CNAME b/obsei_module/CNAME
new file mode 100644
index 0000000000000000000000000000000000000000..48c4fb7ad825704db946a83e64693071ebe454d7
--- /dev/null
+++ b/obsei_module/CNAME
@@ -0,0 +1 @@
+www.obsei.com
\ No newline at end of file
diff --git a/obsei_module/CODE_OF_CONDUCT.md b/obsei_module/CODE_OF_CONDUCT.md
new file mode 100644
index 0000000000000000000000000000000000000000..e8c5ad02324a0fa0778f625fd77f183f3c531ff7
--- /dev/null
+++ b/obsei_module/CODE_OF_CONDUCT.md
@@ -0,0 +1,128 @@
+# Contributor Covenant Code of Conduct
+
+## Our Pledge
+
+We as members, contributors, and leaders pledge to make participation in our
+community a harassment-free experience for everyone, regardless of age, body
+size, visible or invisible disability, ethnicity, sex characteristics, gender
+identity and expression, level of experience, education, socio-economic status,
+nationality, personal appearance, race, religion, or sexual identity
+and orientation.
+
+We pledge to act and interact in ways that contribute to an open, welcoming,
+diverse, inclusive, and healthy community.
+
+## Our Standards
+
+Examples of behavior that contributes to a positive environment for our
+community include:
+
+- Demonstrating empathy and kindness toward other people
+- Being respectful of differing opinions, viewpoints, and experiences
+- Giving and gracefully accepting constructive feedback
+- Accepting responsibility and apologizing to those affected by our mistakes,
+  and learning from the experience
+- Focusing on what is best not just for us as individuals, but for the
+  overall community
+
+Examples of unacceptable behavior include:
+
+- The use of sexualized language or imagery, and sexual attention or
+  advances of any kind
+- Trolling, insulting or derogatory comments, and personal or political attacks
+- Public or private harassment
+- Publishing others' private information, such as a physical or email
+  address, without their explicit permission
+- Other conduct which could reasonably be considered inappropriate in a
+  professional setting
+
+## Enforcement Responsibilities
+
+Community leaders are responsible for clarifying and enforcing our standards of
+acceptable behavior and will take appropriate and fair corrective action in
+response to any behavior that they deem inappropriate, threatening, offensive,
+or harmful.
+
+Community leaders have the right and responsibility to remove, edit, or reject
+comments, commits, code, wiki edits, issues, and other contributions that are
+not aligned to this Code of Conduct, and will communicate reasons for moderation
+decisions when appropriate.
+
+## Scope
+
+This Code of Conduct applies within all community spaces, and also applies when
+an individual is officially representing the community in public spaces.
+Examples of representing our community include using an official e-mail address,
+posting via an official social media account, or acting as an appointed
+representative at an online or offline event.
+
+## Enforcement
+
+Instances of abusive, harassing, or otherwise unacceptable behavior may be
+reported to the community leaders responsible for enforcement at
+obsei.tool@gmail.com
+All complaints will be reviewed and investigated promptly and fairly.
+
+All community leaders are obligated to respect the privacy and security of the
+reporter of any incident.
+
+## Enforcement Guidelines
+
+Community leaders will follow these Community Impact Guidelines in determining
+the consequences for any action they deem in violation of this Code of Conduct:
+
+### 1. Correction
+
+**Community Impact**: Use of inappropriate language or other behavior deemed
+unprofessional or unwelcome in the community.
+
+**Consequence**: A written warning from community leaders, providing
+clarity around the nature of the violation and an explanation of why the
+behavior was inappropriate. A public apology may be requested.
+
+### 2. Warning
+
+**Community Impact**: A violation through a single incident or series
+of actions.
+
+**Consequence**: A warning with consequences for continued behavior. No
+interaction with the people involved, including unsolicited interaction with
+those enforcing the Code of Conduct, for a specified period of time. This
+includes avoiding interactions in community spaces as well as external channels
+like social media. Violating these terms may lead to a temporary or
+permanent ban.
+
+### 3. Temporary Ban
+
+**Community Impact**: A serious violation of community standards, including
+sustained inappropriate behavior.
+
+**Consequence**: A temporary ban from any sort of interaction or public
+communication with the community for a specified period of time. No public or
+private interaction with the people involved, including unsolicited interaction
+with those enforcing the Code of Conduct, is allowed during this period.
+Violating these terms may lead to a permanent ban.
+
+### 4. Permanent Ban
+
+**Community Impact**: Demonstrating a pattern of violation of community
+standards, including sustained inappropriate behavior, harassment of an
+individual, or aggression toward or disparagement of classes of individuals.
+
+**Consequence**: A permanent ban from any sort of public interaction within
+the community.
+
+## Attribution
+
+This Code of Conduct is adapted from the [Contributor Covenant][homepage],
+version 2.0, available at
+https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
+
+Community Impact Guidelines were inspired by [Mozilla's code of conduct
+enforcement ladder](https://github.com/mozilla/diversity).
+
+[homepage]: https://www.contributor-covenant.org
+
+For answers to common questions about this code of conduct, see the FAQ at
+https://www.contributor-covenant.org/faq. Translations are available at
+https://www.contributor-covenant.org/translations.
diff --git a/obsei_module/CONTRIBUTING.md b/obsei_module/CONTRIBUTING.md
new file mode 100644
index 0000000000000000000000000000000000000000..f0afbb0a9ac896f8afb37430e389450efe498926
--- /dev/null
+++ b/obsei_module/CONTRIBUTING.md
@@ -0,0 +1,103 @@
+# 👐 Contributing to Obsei
+
+First off, thank you for even considering contributing to this package, every contribution big or small is greatly appreciated.
+Community contributions are what keep projects like this fueled and constantly improving, so a big thanks to you!
+
+Below are some sections detailing the guidelines we'd like you to follow to make your contribution as seamless as possible.
+
+- [Code of Conduct](#coc)
+- [Asking a Question and Discussions](#question)
+- [Issues, Bugs, and Feature Requests](#issue)
+- [Submission Guidelines](#submit)
+- [Code Style and Formatting](#code)
+- [Contributor License Agreement](#cla)
+
+## 📜 <a name="coc"></a> Code of Conduct
+
+The [Code of Conduct](https://github.com/obsei/obsei/blob/master/CODE_OF_CONDUCT.md) applies within all community spaces.
+If you are not familiar with our Code of Conduct policy, take a minute to read the policy before starting with your first contribution.
+
+## 🗣️ <a name="question"></a> Query or Discussion
+
+We would like to use [Github discussions](https://github.com/obsei/obsei/discussions) as the central hub for all
+community discussions, questions, and everything else in between. While Github discussions is a new service (as of 2021)
+we believe that it really helps keep this repo as one single source to find all relevant information. Our hope is that
+discussion page functions as a record of all the conversations that help contribute to the project's development.
+
+If you are new to [Github discussions](https://github.com/obsei/obsei/discussions) it is a very similar experience
+to Stack Overflow with an added element of general discussion and discourse rather than solely being question and answer based.
+
+## 🪲 <a name="issue"></a> Issues, Bugs, and Feature Requests
+
+We are very open to community contributions and appreciate anything that improves **Obsei**. This includes fixings typos, adding missing documentation, fixing bugs or adding new features.
+To avoid unnecessary work on either side, please stick to the following process:
+
+1. If you feel like your issue is not specific and more of a general question about a design decision, or algorithm implementation maybe start a [discussion](https://github.com/obsei/obsei/discussions) instead, this helps keep the issues less cluttered and encourages more open-ended conversation.
+2. Check if there is already [an related issue](https://github.com/obsei/obsei/issues).
+3. If there is not, open a new one to start a discussion. Some features might be a nice idea, but don't fit in the scope of Obsei and we hate to close finished PRs.
+4. If we came to the conclusion to move forward with your issue, we will be happy to accept a pull request. Make sure you create a pull request in an early draft version and ask for feedback.
+5. Verify that all tests in the CI pass (and add new ones if you implement anything new)
+
+See [below](#submit) for some guidelines.
+
+## ✉️ <a name="submit"></a> Submission Guidelines
+
+### Submitting an Issue
+
+Before you submit your issue search the archive, maybe your question was already answered.
+
+If your issue appears to be a bug, and hasn't been reported, open a new issue.
+Help us to maximize the effort we can spend fixing issues and adding new
+features, by not reporting duplicate issues. Providing the following information will increase the
+chances of your issue being dealt with quickly:
+
+- **Describe the bug** - A clear and concise description of what the bug is.
+- **To Reproduce**- Steps to reproduce the behavior.
+- **Expected behavior** - A clear and concise description of what you expected to happen.
+- **Environment**
+  - Obsei version
+  - Python version
+  - OS
+- **Suggest a Fix** - if you can't fix the bug yourself, perhaps you can point to what might be
+  causing the problem (line of code or commit)
+
+When you submit a PR you will be presented with a PR template, please fill this in as best you can.
+
+### Submitting a Pull Request
+
+Before you submit your pull request consider the following guidelines:
+
+- Search [GitHub](https://github.com/obsei/obsei/pulls) for an open or closed Pull Request
+  that relates to your submission. You don't want to duplicate effort.
+- Fork the main repo if not already done
+- Rebase fork with `upstream master`
+- Create new branch and add the changes in that branch
+- Add supporting test cases
+- Follow our [Coding Rules](#rules).
+- Avoid checking in files that shouldn't be tracked (e.g `dist`, `build`, `.tmp`, `.idea`).
+  We recommend using a [global](#global-gitignore) gitignore for this.
+- Before you commit please run the test suite and make sure all tests are passing.
+- Format your code appropriately:
+  - This package uses [black](https://black.readthedocs.io/en/stable/) as its formatter.
+    In order to format your code with black run `black . ` from the root of the package.
+- Run `pre-commit run --all-files` if you're adding new hooks to pre-commit config file. By default, pre-commit will run on modified files when commiting changes.
+- Commit your changes using a descriptive commit message.
+- In GitHub, send a pull request to `obsei:master`.
+- If we suggest changes then:
+  - Make the required updates.
+  - Rebase your branch and force push to your GitHub repository (this will update your Pull Request):
+
+That's it! Thank you for your contribution!
+
+## ✅ <a name="rules"></a> Coding Rules
+
+We generally follow the [Google Python style guide](http://google.github.io/styleguide/pyguide.html).
+
+## 📝 <a name="cla"></a> Contributor License Agreement
+
+That we do not have any potential problems later it is sadly necessary to sign a [Contributor License Agreement](CONTRIBUTOR_LICENSE_AGREEMENT.md). That can be done literally with the push of a button.
+
+---
+
+_This guide was inspired by the [transformers-interpret](https://github.com/cdpierse/transformers-interpret/blob/master/CONTRIBUTING.md), 
+[Haystack](https://github.com/deepset-ai/haystack/blob/master/CONTRIBUTING.md) and [n8n](https://github.com/n8n-io/n8n/blob/master/CONTRIBUTOR_LICENSE_AGREEMENT.md)_
diff --git a/obsei_module/CONTRIBUTOR_LICENSE_AGREEMENT.md b/obsei_module/CONTRIBUTOR_LICENSE_AGREEMENT.md
new file mode 100644
index 0000000000000000000000000000000000000000..8b4784f57938ed30cbc0de319b9c90df121b3632
--- /dev/null
+++ b/obsei_module/CONTRIBUTOR_LICENSE_AGREEMENT.md
@@ -0,0 +1,3 @@
+# Obsei Contributor License Agreement
+
+I give Obsei's Creator permission to license my contributions to any terms they like. I am giving them this license in order to make it possible for them to accept my contributions into their project.
\ No newline at end of file
diff --git a/obsei_module/Dockerfile b/obsei_module/Dockerfile
new file mode 100644
index 0000000000000000000000000000000000000000..b16cbfd7580a3c384e93b690c80a8e4812d1a57f
--- /dev/null
+++ b/obsei_module/Dockerfile
@@ -0,0 +1,38 @@
+# This is Docker file to Obsei SDK with dependencies installed
+FROM python:3.10-slim-bullseye
+
+RUN useradd --create-home user
+WORKDIR /home/user
+
+# env variable
+ENV PIP_DISABLE_PIP_VERSION_CHECK 1
+ENV PIP_NO_CACHE_DIR 1
+ENV WORKFLOW_SCRIPT '/home/user/obsei/process_workflow.py'
+ENV OBSEI_CONFIG_PATH ""
+ENV OBSEI_CONFIG_FILENAME ""
+
+
+# Hack to install jre on debian
+RUN mkdir -p /usr/share/man/man1
+
+# install few required tools
+RUN apt-get update && apt-get install -y --no-install-recommends curl git pkg-config cmake libncurses5 g++ \
+    && apt-get clean autoclean && apt-get autoremove -y \
+    && rm -rf /var/lib/{apt,dpkg,cache,log}/
+
+# install as a package
+COPY pyproject.toml README.md /home/user/
+RUN pip install --upgrade pip
+
+# copy README
+COPY README.md /home/user/
+
+# copy code
+COPY obsei /home/user/obsei
+RUN pip install -e .[all]
+
+
+USER user
+
+# cmd for running the API
+CMD ["sh", "-c", "python ${WORKFLOW_SCRIPT}"]
diff --git a/obsei_module/LICENSE b/obsei_module/LICENSE
new file mode 100644
index 0000000000000000000000000000000000000000..fecb6d71f505d183b3e4f5bbda806637c660d0f1
--- /dev/null
+++ b/obsei_module/LICENSE
@@ -0,0 +1,201 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright 2020-2022 Oraika Technologies Private Limited (https://www.oraika.com)
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/obsei_module/MANIFEST.in b/obsei_module/MANIFEST.in
new file mode 100644
index 0000000000000000000000000000000000000000..84c71247ce333d3b19e1265f4da3fd130972bc35
--- /dev/null
+++ b/obsei_module/MANIFEST.in
@@ -0,0 +1,3 @@
+include LICENSE
+include requirements.txt
+include README.md
diff --git a/obsei_module/README.md b/obsei_module/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..48c602f538183d2bba5f019e2c81cb32946cb71b
--- /dev/null
+++ b/obsei_module/README.md
@@ -0,0 +1,1067 @@
+<p align="center">
+    <img src="https://raw.githubusercontent.com/obsei/obsei-resources/master/images/obsei-flyer.png" />
+</p>
+
+---
+<p align="center">
+    <a href="https://www.oraika.com">
+            <img src="https://static.wixstatic.com/media/59bc4e_971f153f107e48c7912b9b2d4cd1b1a4~mv2.png/v1/fill/w_177,h_49,al_c,q_85,usm_0.66_1.00_0.01,enc_auto/3_edited.png" />
+    </a>
+</p>
+<p align="center">
+    <a href="https://github.com/obsei/obsei/actions">
+        <img alt="Test" src="https://github.com/obsei/obsei/workflows/CI/badge.svg?branch=master">
+    </a>
+    <a href="https://github.com/obsei/obsei/blob/master/LICENSE">
+        <img alt="License" src="https://img.shields.io/pypi/l/obsei">
+    </a>
+    <a href="https://pypi.org/project/obsei">
+        <img src="https://img.shields.io/pypi/pyversions/obsei" alt="PyPI - Python Version" />
+    </a>
+    <a href="https://pypi.org/project/obsei/">
+        <img alt="Release" src="https://img.shields.io/pypi/v/obsei">
+    </a>
+    <a href="https://pepy.tech/project/obsei">
+        <img src="https://pepy.tech/badge/obsei/month" alt="Downloads" />
+    </a>
+    <a href="https://huggingface.co/spaces/obsei/obsei-demo">
+        <img src="https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue" alt="HF Spaces" />
+    </a>
+    <a href="https://github.com/obsei/obsei/commits/master">
+        <img alt="Last commit" src="https://img.shields.io/github/last-commit/obsei/obsei">
+    </a>
+    <a href="https://github.com/obsei/obsei">
+        <img alt="Github stars" src="https://img.shields.io/github/stars/obsei/obsei?style=social">
+    </a>
+    <a href="https://www.youtube.com/channel/UCqdvgro1BzU13tkAfX3jCJA">
+        <img alt="YouTube Channel Subscribers" src="https://img.shields.io/youtube/channel/subscribers/UCqdvgro1BzU13tkAfX3jCJA?style=social">
+    </a>
+    <a href="https://join.slack.com/t/obsei-community/shared_invite/zt-r0wnuz02-FAkAmhTAUoc6pD4SLB9Ikg">
+        <img src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/Slack_join.svg" height="30">
+    </a>
+    <a href="https://twitter.com/ObseiAI">
+        <img src="https://img.shields.io/twitter/follow/ObseiAI?style=social">
+    </a>
+</p>
+
+---
+
+![](https://raw.githubusercontent.com/obsei/obsei-resources/master/gifs/obsei_flow.gif)
+
+---
+
+<span style="color:red">
+<b>Note</b>: Obsei is still in alpha stage hence carefully use it in Production. Also, as it is constantly undergoing development hence master branch may contain many breaking changes. Please use released version.
+</span>
+
+---
+
+**Obsei** (pronounced "Ob see" | /əb-'sē/) is an open-source, low-code, AI powered automation tool. _Obsei_ consists of -
+
+- **Observer**: Collect unstructured data from various sources like tweets from Twitter, Subreddit comments on Reddit, page post's comments from Facebook, App Stores reviews, Google reviews, Amazon reviews, News, Website, etc.
+- **Analyzer**: Analyze unstructured data collected with various AI tasks like classification, sentiment analysis, translation, PII, etc.
+- **Informer**: Send analyzed data to various destinations like ticketing platforms, data storage, dataframe, etc so that the user can take further actions and perform analysis on the data.
+
+All the Observers can store their state in databases (Sqlite, Postgres, MySQL, etc.), making Obsei suitable for scheduled jobs or serverless applications.
+
+![Obsei diagram](https://raw.githubusercontent.com/obsei/obsei-resources/master/images/Obsei_diagram.png)
+
+### Future direction -
+
+- Text, Image, Audio, Documents and Video oriented workflows
+- Collect data from every possible private and public channels
+- Add every possible workflow to an AI downstream application to automate manual cognitive workflows
+
+## Use cases
+
+_Obsei_ use cases are following, but not limited to -
+
+- Social listening: Listening about social media posts, comments, customer feedback, etc.
+- Alerting/Notification: To get auto-alerts for events such as customer complaints, qualified sales leads, etc.
+- Automatic customer issue creation based on customer complaints on Social Media, Email, etc.
+- Automatic assignment of proper tags to tickets based content of customer complaint for example login issue, sign up issue, delivery issue, etc.
+- Extraction of deeper insight from feedbacks on various platforms
+- Market research
+- Creation of dataset for various AI tasks
+- Many more based on creativity 💡
+
+## Installation
+
+### Prerequisite
+
+Install the following (if not present already) -
+
+- Install [Python 3.7+](https://www.python.org/downloads/)
+- Install [PIP](https://pip.pypa.io/en/stable/installing/)
+
+### Install Obsei
+
+You can install Obsei either via PIP or Conda based on your preference.
+To install latest released version -
+
+```shell
+pip install obsei[all]
+```
+
+Install from master branch (if you want to try the latest features) -
+
+```shell
+git clone https://github.com/obsei/obsei.git
+cd obsei
+pip install --editable .[all]
+```
+  
+Note: `all` option will install all the dependencies which might not be needed for your workflow, alternatively 
+following options are available to install minimal dependencies as per need -
+ - `pip install obsei[source]`: To install dependencies related to all observers
+ - `pip install obsei[sink]`: To install dependencies related to all informers
+ - `pip install obsei[analyzer]`:  To install dependencies related to all analyzers, it will install pytorch as well
+ - `pip install obsei[twitter-api]`: To install dependencies related to Twitter observer
+ - `pip install obsei[google-play-scraper]`: To install dependencies related to Play Store review scrapper observer
+ - `pip install obsei[google-play-api]`: To install dependencies related to Google official play store review API based observer
+ - `pip install obsei[app-store-scraper]`: To install dependencies related to Apple App Store review scrapper observer
+ - `pip install obsei[reddit-scraper]`: To install dependencies related to Reddit post and comment scrapper observer
+ - `pip install obsei[reddit-api]`: To install dependencies related to Reddit official api based observer
+ - `pip install obsei[pandas]`: To install dependencies related to TSV/CSV/Pandas based observer and informer
+ - `pip install obsei[google-news-scraper]`: To install dependencies related to Google news scrapper observer
+ - `pip install obsei[facebook-api]`: To install dependencies related to Facebook official page post and comments api based observer
+ - `pip install obsei[atlassian-api]`: To install dependencies related to Jira official api based informer
+ - `pip install obsei[elasticsearch]`: To install dependencies related to elasticsearch informer
+ - `pip install obsei[slack-api]`:To install dependencies related to Slack official api based informer
+
+You can also mix multiple dependencies together in single installation command. For example to install dependencies 
+Twitter observer, all analyzer, and Slack informer use following command -
+```shell
+pip install obsei[twitter-api, analyzer, slack-api]
+```
+
+
+## How to use
+
+Expand the following steps and create a workflow -
+
+<details><summary><b>Step 1: Configure Source/Observer</b></summary>
+
+<table ><tbody ><tr></tr><tr>
+<td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/twitter.png" width="20" height="20"><b>Twitter</b></summary><hr>
+
+```python
+from obsei.source.twitter_source import TwitterCredentials, TwitterSource, TwitterSourceConfig
+
+# initialize twitter source config
+source_config = TwitterSourceConfig(
+   keywords=["issue"], # Keywords, @user or #hashtags
+   lookup_period="1h", # Lookup period from current time, format: `<number><d|h|m>` (day|hour|minute)
+   cred_info=TwitterCredentials(
+       # Enter your twitter consumer key and secret. Get it from https://developer.twitter.com/en/apply-for-access
+       consumer_key="<twitter_consumer_key>",
+       consumer_secret="<twitter_consumer_secret>",
+       bearer_token='<ENTER BEARER TOKEN>',
+   )
+)
+
+# initialize tweets retriever
+source = TwitterSource()
+```
+
+</details>
+</td>
+</tr>
+<tr>
+<td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/Youtube.png" width="20" height="20"><b>Youtube Scrapper</b></summary><hr>
+
+```python
+from obsei.source.youtube_scrapper import YoutubeScrapperSource, YoutubeScrapperConfig
+
+# initialize Youtube source config
+source_config = YoutubeScrapperConfig(
+    video_url="https://www.youtube.com/watch?v=uZfns0JIlFk", # Youtube video URL
+    fetch_replies=True, # Fetch replies to comments
+    max_comments=10, # Total number of comments and replies to fetch
+    lookup_period="1Y", # Lookup period from current time, format: `<number><d|h|m|M|Y>` (day|hour|minute|month|year)
+)
+
+# initialize Youtube comments retriever
+source = YoutubeScrapperSource()
+```
+
+</details>
+</td>
+</tr>
+<tr>
+<td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/facebook.png" width="20" height="20"><b>Facebook</b></summary><hr>
+
+```python
+from obsei.source.facebook_source import FacebookCredentials, FacebookSource, FacebookSourceConfig
+
+# initialize facebook source config
+source_config = FacebookSourceConfig(
+   page_id="110844591144719", # Facebook page id, for example this one for Obsei
+   lookup_period="1h", # Lookup period from current time, format: `<number><d|h|m>` (day|hour|minute)
+   cred_info=FacebookCredentials(
+       # Enter your facebook app_id, app_secret and long_term_token. Get it from https://developers.facebook.com/apps/
+       app_id="<facebook_app_id>",
+       app_secret="<facebook_app_secret>",
+       long_term_token="<facebook_long_term_token>",
+   )
+)
+
+# initialize facebook post comments retriever
+source = FacebookSource()
+```
+
+</details>
+</td>
+</tr>
+<tr>
+<td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/gmail.png" width="20" height="20"><b>Email</b></summary><hr>
+
+```python
+from obsei.source.email_source import EmailConfig, EmailCredInfo, EmailSource
+
+# initialize email source config
+source_config = EmailConfig(
+   # List of IMAP servers for most commonly used email providers
+   # https://www.systoolsgroup.com/imap/
+   # Also, if you're using a Gmail account then make sure you allow less secure apps on your account -
+   # https://myaccount.google.com/lesssecureapps?pli=1
+   # Also enable IMAP access -
+   # https://mail.google.com/mail/u/0/#settings/fwdandpop
+   imap_server="imap.gmail.com", # Enter IMAP server
+   cred_info=EmailCredInfo(
+       # Enter your email account username and password
+       username="<email_username>",
+       password="<email_password>"
+   ),
+   lookup_period="1h" # Lookup period from current time, format: `<number><d|h|m>` (day|hour|minute)
+)
+
+# initialize email retriever
+source = EmailSource()
+```
+
+</details>
+</td>
+</tr>
+<tr>
+<td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/google_maps.png" width="20" height="20"><b>Google Maps Reviews Scrapper</b></summary><hr>
+
+```python
+from obsei.source.google_maps_reviews import OSGoogleMapsReviewsSource, OSGoogleMapsReviewsConfig
+
+# initialize Outscrapper Maps review source config
+source_config = OSGoogleMapsReviewsConfig(
+   # Collect API key from https://outscraper.com/
+   api_key="<Enter Your API Key>",
+   # Enter Google Maps link or place id
+   # For example below is for the "Taj Mahal"
+   queries=["https://www.google.co.in/maps/place/Taj+Mahal/@27.1751496,78.0399535,17z/data=!4m5!3m4!1s0x39747121d702ff6d:0xdd2ae4803f767dde!8m2!3d27.1751448!4d78.0421422"],
+   number_of_reviews=10,
+)
+
+
+# initialize Outscrapper Maps review retriever
+source = OSGoogleMapsReviewsSource()
+```
+
+</details>
+</td>
+</tr>
+<tr>
+<td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/appstore.png" width="20" height="20"><b>AppStore Reviews Scrapper</b></summary><hr>
+
+```python
+from obsei.source.appstore_scrapper import AppStoreScrapperConfig, AppStoreScrapperSource
+
+# initialize app store source config
+source_config = AppStoreScrapperConfig(
+   # Need two parameters app_id and country.
+   # `app_id` can be found at the end of the url of app in app store.
+   # For example - https://apps.apple.com/us/app/xcode/id497799835
+   # `310633997` is the app_id for xcode and `us` is country.
+   countries=["us"],
+   app_id="310633997",
+   lookup_period="1h" # Lookup period from current time, format: `<number><d|h|m>` (day|hour|minute)
+)
+
+
+# initialize app store reviews retriever
+source = AppStoreScrapperSource()
+```
+
+</details>
+</td>
+</tr>
+<tr>
+<td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/playstore.png" width="20" height="20"><b>Play Store Reviews Scrapper</b></summary><hr>
+
+```python
+from obsei.source.playstore_scrapper import PlayStoreScrapperConfig, PlayStoreScrapperSource
+
+# initialize play store source config
+source_config = PlayStoreScrapperConfig(
+   # Need two parameters package_name and country.
+   # `package_name` can be found at the end of the url of app in play store.
+   # For example - https://play.google.com/store/apps/details?id=com.google.android.gm&hl=en&gl=US
+   # `com.google.android.gm` is the package_name for xcode and `us` is country.
+   countries=["us"],
+   package_name="com.google.android.gm",
+   lookup_period="1h" # Lookup period from current time, format: `<number><d|h|m>` (day|hour|minute)
+)
+
+# initialize play store reviews retriever
+source = PlayStoreScrapperSource()
+```
+
+</details>
+</td>
+</tr>
+<tr>
+<td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/reddit.png" width="20" height="20"><b>Reddit</b></summary><hr>
+
+```python
+from obsei.source.reddit_source import RedditConfig, RedditSource, RedditCredInfo
+
+# initialize reddit source config
+source_config = RedditConfig(
+   subreddits=["wallstreetbets"], # List of subreddits
+   # Reddit account username and password
+   # You can also enter reddit client_id and client_secret or refresh_token
+   # Create credential at https://www.reddit.com/prefs/apps
+   # Also refer https://praw.readthedocs.io/en/latest/getting_started/authentication.html
+   # Currently Password Flow, Read Only Mode and Saved Refresh Token Mode are supported
+   cred_info=RedditCredInfo(
+       username="<reddit_username>",
+       password="<reddit_password>"
+   ),
+   lookup_period="1h" # Lookup period from current time, format: `<number><d|h|m>` (day|hour|minute)
+)
+
+# initialize reddit retriever
+source = RedditSource()
+```
+
+</details>
+</td>
+</tr>
+<tr>
+<td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/reddit.png" width="20" height="20"><b>Reddit Scrapper</b></summary><hr>
+
+<i>Note: Reddit heavily rate limit scrappers, hence use it to fetch small data during long period</i>
+
+```python
+from obsei.source.reddit_scrapper import RedditScrapperConfig, RedditScrapperSource
+
+# initialize reddit scrapper source config
+source_config = RedditScrapperConfig(
+   # Reddit subreddit, search etc rss url. For proper url refer following link -
+   # Refer https://www.reddit.com/r/pathogendavid/comments/tv8m9/pathogendavids_guide_to_rss_and_reddit/
+   url="https://www.reddit.com/r/wallstreetbets/comments/.rss?sort=new",
+   lookup_period="1h" # Lookup period from current time, format: `<number><d|h|m>` (day|hour|minute)
+)
+
+# initialize reddit retriever
+source = RedditScrapperSource()
+```
+
+</details>
+</td>
+</tr>
+<tr>
+<td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/googlenews.png" width="20" height="20"><b>Google News</b></summary><hr>
+
+```python
+from obsei.source.google_news_source import GoogleNewsConfig, GoogleNewsSource
+
+# initialize Google News source config
+source_config = GoogleNewsConfig(
+   query='bitcoin',
+   max_results=5,
+   # To fetch full article text enable `fetch_article` flag
+   # By default google news gives title and highlight
+   fetch_article=True,
+   # proxy='http://127.0.0.1:8080'
+)
+
+# initialize Google News retriever
+source = GoogleNewsSource()
+```
+
+</details>
+</td>
+</tr>
+<tr>
+<td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/webcrawler.png" width="20" height="20"><b>Web Crawler</b></summary><hr>
+
+```python
+from obsei.source.website_crawler_source import TrafilaturaCrawlerConfig, TrafilaturaCrawlerSource
+
+# initialize website crawler source config
+source_config = TrafilaturaCrawlerConfig(
+   urls=['https://obsei.github.io/obsei/']
+)
+
+# initialize website text retriever
+source = TrafilaturaCrawlerSource()
+```
+
+</details>
+</td>
+</tr>
+<tr>
+<td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/pandas.svg" width="20" height="20"><b>Pandas DataFrame</b></summary><hr>
+
+```python
+import pandas as pd
+from obsei.source.pandas_source import PandasSource, PandasSourceConfig
+
+# Initialize your Pandas DataFrame from your sources like csv, excel, sql etc
+# In following example we are reading csv which have two columns title and text
+csv_file = "https://raw.githubusercontent.com/deepset-ai/haystack/master/tutorials/small_generator_dataset.csv"
+dataframe = pd.read_csv(csv_file)
+
+# initialize pandas sink config
+sink_config = PandasSourceConfig(
+   dataframe=dataframe,
+   include_columns=["score"],
+   text_columns=["name", "degree"],
+)
+
+# initialize pandas sink
+sink = PandasSource()
+```
+
+</details>
+</td>
+</tr>
+</tbody>
+</table>
+
+</details>
+
+<details><summary><b>Step 2: Configure Analyzer</b></summary>
+
+<i>Note: To run transformers in an offline mode, check [transformers offline mode](https://huggingface.co/transformers/installation.html#offline-mode).</i>
+
+<p>Some analyzer support GPU and to utilize pass <b>device</b> parameter.
+List of possible values of <b>device</b> parameter (default value <i>auto</i>):
+<ol>
+    <li> <b>auto</b>: GPU (cuda:0) will be used if available otherwise CPU will be used
+    <li> <b>cpu</b>: CPU will be used
+    <li> <b>cuda:{id}</b> - GPU will be used with provided CUDA device id
+</ol>
+</p>
+
+<table ><tbody ><tr></tr><tr>
+<td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/classification.png" width="20" height="20"><b>Text Classification</b></summary><hr>
+
+Text classification: Classify text into user provided categories.
+
+```python
+from obsei.analyzer.classification_analyzer import ClassificationAnalyzerConfig, ZeroShotClassificationAnalyzer
+
+# initialize classification analyzer config
+# It can also detect sentiments if "positive" and "negative" labels are added.
+analyzer_config=ClassificationAnalyzerConfig(
+   labels=["service", "delay", "performance"],
+)
+
+# initialize classification analyzer
+# For supported models refer https://huggingface.co/models?filter=zero-shot-classification
+text_analyzer = ZeroShotClassificationAnalyzer(
+   model_name_or_path="typeform/mobilebert-uncased-mnli",
+   device="auto"
+)
+```
+
+</details>
+</td>
+</tr>
+<tr>
+<td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/sentiment.png" width="20" height="20"><b>Sentiment Analyzer</b></summary><hr>
+
+Sentiment Analyzer: Detect the sentiment of the text. Text classification can also perform sentiment analysis but if you don't want to use heavy-duty NLP model then use less resource hungry dictionary based Vader Sentiment detector.
+
+```python
+from obsei.analyzer.sentiment_analyzer import VaderSentimentAnalyzer
+
+# Vader does not need any configuration settings
+analyzer_config=None
+
+# initialize vader sentiment analyzer
+text_analyzer = VaderSentimentAnalyzer()
+```
+
+</details>
+</td>
+</tr>
+<tr>
+<td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/ner.png" width="20" height="20"><b>NER Analyzer</b></summary><hr>
+
+NER (Named-Entity Recognition) Analyzer: Extract information and classify named entities mentioned in text into pre-defined categories such as person names, organizations, locations, medical codes, time expressions, quantities, monetary values, percentages, etc
+
+```python
+from obsei.analyzer.ner_analyzer import NERAnalyzer
+
+# NER analyzer does not need configuration settings
+analyzer_config=None
+
+# initialize ner analyzer
+# For supported models refer https://huggingface.co/models?filter=token-classification
+text_analyzer = NERAnalyzer(
+   model_name_or_path="elastic/distilbert-base-cased-finetuned-conll03-english",
+   device = "auto"
+)
+```
+
+</details>
+</td>
+</tr>
+<tr>
+<td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/translator.png" width="20" height="20"><b>Translator</b></summary><hr>
+
+```python
+from obsei.analyzer.translation_analyzer import TranslationAnalyzer
+
+# Translator does not need analyzer config
+analyzer_config = None
+
+# initialize translator
+# For supported models refer https://huggingface.co/models?pipeline_tag=translation
+analyzer = TranslationAnalyzer(
+   model_name_or_path="Helsinki-NLP/opus-mt-hi-en",
+   device = "auto"
+)
+```
+
+</details>
+</td>
+</tr>
+<tr>
+<td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/pii.png" width="20" height="20"><b>PII Anonymizer</b></summary><hr>
+
+```python
+from obsei.analyzer.pii_analyzer import PresidioEngineConfig, PresidioModelConfig, \
+   PresidioPIIAnalyzer, PresidioPIIAnalyzerConfig
+
+# initialize pii analyzer's config
+analyzer_config = PresidioPIIAnalyzerConfig(
+   # Whether to return only pii analysis or anonymize text
+   analyze_only=False,
+   # Whether to return detail information about anonymization decision
+   return_decision_process=True
+)
+
+# initialize pii analyzer
+analyzer = PresidioPIIAnalyzer(
+   engine_config=PresidioEngineConfig(
+       # spacy and stanza nlp engines are supported
+       # For more info refer
+       # https://microsoft.github.io/presidio/analyzer/developing_recognizers/#utilize-spacy-or-stanza
+       nlp_engine_name="spacy",
+       # Update desired spacy model and language
+       models=[PresidioModelConfig(model_name="en_core_web_lg", lang_code="en")]
+   )
+)
+```
+
+</details>
+</td>
+</tr>
+<tr>
+<td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/dummy.png" width="20" height="20"><b>Dummy Analyzer</b></summary><hr>
+
+Dummy Analyzer: Does nothing. Its simply used for transforming the input (TextPayload) to output (TextPayload) and adding the user supplied dummy data.
+
+```python
+from obsei.analyzer.dummy_analyzer import DummyAnalyzer, DummyAnalyzerConfig
+
+# initialize dummy analyzer's configuration settings
+analyzer_config = DummyAnalyzerConfig()
+
+# initialize dummy analyzer
+analyzer = DummyAnalyzer()
+```
+
+</details>
+</td>
+</tr>
+</tbody>
+</table>
+
+</details>
+
+<details><summary><b>Step 3: Configure Sink/Informer</b></summary>
+
+<table ><tbody ><tr></tr><tr>
+<td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/slack.svg" width="25" height="25"><b>Slack</b></summary><hr>
+
+```python
+from obsei.sink.slack_sink import SlackSink, SlackSinkConfig
+
+# initialize slack sink config
+sink_config = SlackSinkConfig(
+   # Provide slack bot/app token
+   # For more detail refer https://slack.com/intl/en-de/help/articles/215770388-Create-and-regenerate-API-tokens
+   slack_token="<Slack_app_token>",
+   # To get channel id refer https://stackoverflow.com/questions/40940327/what-is-the-simplest-way-to-find-a-slack-team-id-and-a-channel-id
+   channel_id="C01LRS6CT9Q"
+)
+
+# initialize slack sink
+sink = SlackSink()
+```
+
+</details>
+</td>
+</tr>
+<tr>
+<td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/zendesk.png" width="20" height="20"><b>Zendesk</b></summary><hr>
+
+```python
+from obsei.sink.zendesk_sink import ZendeskSink, ZendeskSinkConfig, ZendeskCredInfo
+
+# initialize zendesk sink config
+sink_config = ZendeskSinkConfig(
+   # provide zendesk domain
+   domain="zendesk.com",
+   # provide subdomain if you have one
+   subdomain=None,
+   # Enter zendesk user details
+   cred_info=ZendeskCredInfo(
+       email="<zendesk_user_email>",
+       password="<zendesk_password>"
+   )
+)
+
+# initialize zendesk sink
+sink = ZendeskSink()
+```
+
+</details>
+</td>
+</tr>
+<tr>
+<td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/jira.png" width="20" height="20"><b>Jira</b></summary><hr>
+
+```python
+from obsei.sink.jira_sink import JiraSink, JiraSinkConfig
+
+# For testing purpose you can start jira server locally
+# Refer https://developer.atlassian.com/server/framework/atlassian-sdk/atlas-run-standalone/
+
+# initialize Jira sink config
+sink_config = JiraSinkConfig(
+   url="http://localhost:2990/jira", # Jira server url
+    # Jira username & password for user who have permission to create issue
+   username="<username>",
+   password="<password>",
+   # Which type of issue to be created
+   # For more information refer https://support.atlassian.com/jira-cloud-administration/docs/what-are-issue-types/
+   issue_type={"name": "Task"},
+   # Under which project issue to be created
+   # For more information refer https://support.atlassian.com/jira-software-cloud/docs/what-is-a-jira-software-project/
+   project={"key": "CUS"},
+)
+
+# initialize Jira sink
+sink = JiraSink()
+```
+
+</details>
+</td>
+</tr>
+<tr>
+<td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/elastic.png" width="20" height="20"><b>ElasticSearch</b></summary><hr>
+
+```python
+from obsei.sink.elasticsearch_sink import ElasticSearchSink, ElasticSearchSinkConfig
+
+# For testing purpose you can start Elasticsearch server locally via docker
+# `docker run -d --name elasticsearch -p 9200:9200 -e "discovery.type=single-node" elasticsearch:8.5.0`
+
+# initialize Elasticsearch sink config
+sink_config = ElasticSearchSinkConfig(
+   # Elasticsearch server
+   hosts="http://localhost:9200",
+   # Index name, it will create if not exist
+   index_name="test",
+)
+
+# initialize Elasticsearch sink
+sink = ElasticSearchSink()
+```
+
+</details>
+</td>
+</tr>
+<tr>
+<td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/http_api.png" width="20" height="20"><b>Http</b></summary><hr>
+
+```python
+from obsei.sink.http_sink import HttpSink, HttpSinkConfig
+
+# For testing purpose you can create mock http server via postman
+# For more details refer https://learning.postman.com/docs/designing-and-developing-your-api/mocking-data/setting-up-mock/
+
+# initialize http sink config (Currently only POST call is supported)
+sink_config = HttpSinkConfig(
+   # provide http server url
+   url="https://localhost:8080/api/path",
+   # Here you can add headers you would like to pass with request
+   headers={
+       "Content-type": "application/json"
+   }
+)
+
+# To modify or converting the payload, create convertor class
+# Refer obsei.sink.dailyget_sink.PayloadConvertor for example
+
+# initialize http sink
+sink = HttpSink()
+```
+
+</details>
+</td>
+</tr>
+<tr>
+<td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/pandas.svg" width="20" height="20"><b>Pandas DataFrame</b></summary><hr>
+
+```python
+from pandas import DataFrame
+from obsei.sink.pandas_sink import PandasSink, PandasSinkConfig
+
+# initialize pandas sink config
+sink_config = PandasSinkConfig(
+   dataframe=DataFrame()
+)
+
+# initialize pandas sink
+sink = PandasSink()
+```
+
+</details>
+</td>
+</tr>
+<tr>
+<td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/logger.png" width="20" height="20"><b>Logger</b></summary><hr>
+
+This is useful for testing and dry running the pipeline.
+
+```python
+from obsei.sink.logger_sink import LoggerSink, LoggerSinkConfig
+import logging
+import sys
+
+logger = logging.getLogger("Obsei")
+logging.basicConfig(stream=sys.stdout, level=logging.INFO)
+
+# initialize logger sink config
+sink_config = LoggerSinkConfig(
+   logger=logger,
+   level=logging.INFO
+)
+
+# initialize logger sink
+sink = LoggerSink()
+```
+
+</details>
+</td>
+</tr>
+</tbody>
+</table>
+
+</details>
+
+<details><summary><b>Step 4: Join and create workflow</b></summary>
+
+`source` will fetch data from the selected source, then feed it to the `analyzer` for processing, whose output we feed into a `sink` to get notified at that sink.
+
+```python
+# Uncomment if you want logger
+# import logging
+# import sys
+# logger = logging.getLogger(__name__)
+# logging.basicConfig(stream=sys.stdout, level=logging.INFO)
+
+# This will fetch information from configured source ie twitter, app store etc
+source_response_list = source.lookup(source_config)
+
+# Uncomment if you want to log source response
+# for idx, source_response in enumerate(source_response_list):
+#     logger.info(f"source_response#'{idx}'='{source_response.__dict__}'")
+
+# This will execute analyzer (Sentiment, classification etc) on source data with provided analyzer_config
+analyzer_response_list = text_analyzer.analyze_input(
+    source_response_list=source_response_list,
+    analyzer_config=analyzer_config
+)
+
+# Uncomment if you want to log analyzer response
+# for idx, an_response in enumerate(analyzer_response_list):
+#    logger.info(f"analyzer_response#'{idx}'='{an_response.__dict__}'")
+
+# Analyzer output added to segmented_data
+# Uncomment to log it
+# for idx, an_response in enumerate(analyzer_response_list):
+#    logger.info(f"analyzed_data#'{idx}'='{an_response.segmented_data.__dict__}'")
+
+# This will send analyzed output to configure sink ie Slack, Zendesk etc
+sink_response_list = sink.send_data(analyzer_response_list, sink_config)
+
+# Uncomment if you want to log sink response
+# for sink_response in sink_response_list:
+#     if sink_response is not None:
+#         logger.info(f"sink_response='{sink_response}'")
+```
+
+</details>
+
+<details><summary><b>Step 5: Execute workflow</b></summary>
+Copy the code snippets from <b>Steps 1 to 4</b> into a python file, for example <code>example.py</code> and execute the following command -
+
+```shell
+python example.py
+```
+
+</details>
+
+## Demo
+
+We have a minimal [streamlit](https://streamlit.io/) based UI that you can use to test Obsei.
+
+![Screenshot](https://raw.githubusercontent.com/obsei/obsei-resources/master/images/obsei-ui-demo.png)
+
+### Watch UI demo video
+
+[![Introductory and demo video](https://img.youtube.com/vi/GTF-Hy96gvY/2.jpg)](https://www.youtube.com/watch?v=GTF-Hy96gvY)
+
+Check demo at [![](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/obsei/obsei-demo)
+
+(**Note**: Sometimes the Streamlit demo might not work due to rate limiting, use the docker image (locally) in such cases.)
+
+To test locally, just run
+
+```
+docker run -d --name obesi-ui -p 8501:8501 obsei/obsei-ui-demo
+
+# You can find the UI at http://localhost:8501
+```
+
+**To run Obsei workflow easily using GitHub Actions (no sign ups and cloud hosting required), refer to this [repo](https://github.com/obsei/demo-workflow-action)**.
+
+## Companies/Projects using Obsei
+
+Here are some companies/projects (alphabetical order) using Obsei. To add your company/project to the list, please raise a PR or contact us via [email](contact@obsei.com).
+
+- [Oraika](https://www.oraika.com): Contextually understand customer feedback
+- [1Page](https://www.get1page.com/): Giving a better context in meetings and calls
+- [Spacepulse](http://spacepulse.in/): The operating system for spaces
+- [Superblog](https://superblog.ai/): A blazing fast alternative to WordPress and Medium
+- [Zolve](https://zolve.com/): Creating a financial world beyond borders
+- [Utilize](https://www.utilize.app/): No-code app builder for businesses with a deskless workforce
+
+## Articles
+
+<table>
+<thead>
+<tr class="header">
+<th>Sr. No.</th>
+<th>Title</th>
+<th>Author</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>1</td>
+<td>
+    <a href="https://reenabapna.medium.com/ai-based-comparative-customer-feedback-analysis-using-deep-learning-models-def0dc77aaee">AI based Comparative Customer Feedback Analysis Using Obsei</a>
+</td>
+<td>
+    <a href="linkedin.com/in/reena-bapna-66a8691a">Reena Bapna</a>
+</td>
+</tr>
+<tr>
+<td>2</td>
+<td>
+    <a href="https://medium.com/mlearning-ai/linkedin-app-user-feedback-analysis-9c9f98464daa">LinkedIn App - User Feedback Analysis</a>
+</td>
+<td>
+    <a href="http://www.linkedin.com/in/himanshusharmads">Himanshu Sharma</a>
+</td>
+</tr>
+</tbody>
+</table>
+
+## Tutorials
+
+<table>
+<thead>
+<tr class="header">
+<th>Sr. No.</th>
+<th>Workflow</th>
+<th>Colab</th>
+<th>Binder</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td rowspan="2">1</td>
+<td colspan="3">Observe app reviews from Google play store, Analyze them by performing text classification and then Inform them on console via logger</td>
+</tr>
+<tr>
+<td>PlayStore Reviews → Classification → Logger</td>
+<td>
+    <a href="https://colab.research.google.com/github/obsei/obsei/blob/master/tutorials/01_PlayStore_Classification_Logger.ipynb">
+        <img alt="Colab" src="https://colab.research.google.com/assets/colab-badge.svg">
+    </a>
+</td>
+<td>
+    <a href="https://mybinder.org/v2/gh/obsei/obsei/HEAD?filepath=tutorials%2F01_PlayStore_Classification_Logger.ipynb">
+        <img alt="Colab" src="https://mybinder.org/badge_logo.svg">
+    </a>
+</td>
+</tr>
+<tr>
+<td rowspan="2">2</td>
+<td colspan="3">Observe app reviews from Google play store, PreProcess text via various text cleaning functions, Analyze them by performing text classification, Inform them to Pandas DataFrame and store resultant CSV to Google Drive</td>
+</tr>
+<tr>
+<td>PlayStore Reviews → PreProcessing → Classification → Pandas DataFrame → CSV in Google Drive</td>
+<td>
+    <a href="https://colab.research.google.com/github/obsei/obsei/blob/master/tutorials/02_PlayStore_PreProc_Classification_Pandas.ipynb">
+        <img alt="Colab" src="https://colab.research.google.com/assets/colab-badge.svg">
+    </a>
+</td>
+<td>
+    <a href="https://mybinder.org/v2/gh/obsei/obsei/HEAD?filepath=tutorials%2F02_PlayStore_PreProc_Classification_Pandas.ipynb">
+        <img alt="Colab" src="https://mybinder.org/badge_logo.svg">
+    </a>
+</td>
+</tr>
+<tr>
+<td rowspan="2">3</td>
+<td colspan="3">Observe app reviews from Apple app store, PreProcess text via various text cleaning function, Analyze them by performing text classification, Inform them to Pandas DataFrame and store resultant CSV to Google Drive</td>
+</tr>
+<tr>
+<td>AppStore Reviews → PreProcessing → Classification → Pandas DataFrame → CSV in Google Drive</td>
+<td>
+    <a href="https://colab.research.google.com/github/obsei/obsei/blob/master/tutorials/03_AppStore_PreProc_Classification_Pandas.ipynb">
+        <img alt="Colab" src="https://colab.research.google.com/assets/colab-badge.svg">
+    </a>
+</td>
+<td>
+    <a href="https://mybinder.org/v2/gh/obsei/obsei/HEAD?filepath=tutorials%2F03_AppStore_PreProc_Classification_Pandas.ipynb">
+        <img alt="Colab" src="https://mybinder.org/badge_logo.svg">
+    </a>
+</td>
+</tr>
+<tr>
+<td rowspan="2">4</td>
+<td colspan="3">Observe news article from Google news, PreProcess text via various text cleaning function, Analyze them via performing text classification while splitting text in small chunks and later computing final inference using given formula</td>
+</tr>
+<tr>
+<td>Google News → Text Cleaner → Text Splitter → Classification → Inference Aggregator</td>
+<td>
+    <a href="https://colab.research.google.com/github/obsei/obsei/blob/master/tutorials/04_GoogleNews_Cleaner_Splitter_Classification_Aggregator.ipynb">
+        <img alt="Colab" src="https://colab.research.google.com/assets/colab-badge.svg">
+    </a>
+</td>
+<td>
+    <a href="https://mybinder.org/v2/gh/obsei/obsei/HEAD?filepath=tutorials%2F04_GoogleNews_Cleaner_Splitter_Classification_Aggregator.ipynb">
+        <img alt="Colab" src="https://mybinder.org/badge_logo.svg">
+    </a>
+</td>
+</tr>
+</tbody>
+</table>
+
+<details><summary><b>💡Tips: Handle large text classification via Obsei</b></summary>
+
+![](https://raw.githubusercontent.com/obsei/obsei-resources/master/gifs/Long_Text_Classification.gif)
+
+</details>
+
+## Documentation
+
+For detailed installation instructions, usages and examples, refer to our [documentation](https://obsei.github.io/obsei/).
+
+## Support and Release Matrix
+
+<table>
+<thead>
+<tr class="header">
+<th></th>
+<th>Linux</th>
+<th>Mac</th>
+<th>Windows</th>
+<th>Remark</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Tests</td>
+<td style="text-align:center">✅</td>
+<td style="text-align:center">✅</td>
+<td style="text-align:center">✅</td>
+<td>Low Coverage as difficult to test 3rd party libs</td>
+</tr>
+<tr>
+<td>PIP</td>
+<td style="text-align:center">✅</td>
+<td style="text-align:center">✅</td>
+<td style="text-align:center">✅</td>
+<td>Fully Supported</td>
+</tr>
+<tr>
+<td>Conda</td>
+<td style="text-align:center">❌</td>
+<td style="text-align:center">❌</td>
+<td style="text-align:center">❌</td>
+<td>Not Supported</td>
+</tr>
+</tbody>
+</table>
+
+## Discussion forum
+
+Discussion about _Obsei_ can be done at [community forum](https://github.com/obsei/obsei/discussions)
+
+## Changelogs
+
+Refer [releases](https://github.com/obsei/obsei/releases) for changelogs
+
+## Security Issue
+
+For any security issue please contact us via [email](mailto:contact@oraika.com)
+
+## Stargazers over time
+
+[![Stargazers over time](https://starchart.cc/obsei/obsei.svg)](https://starchart.cc/obsei/obsei)
+
+## Maintainers
+
+This project is being maintained by [Oraika Technologies](https://www.oraika.com). [Lalit Pagaria](https://github.com/lalitpagaria) and [Girish Patel](https://github.com/GirishPatel) are maintainers of this project.
+
+## License
+
+- Copyright holder: [Oraika Technologies](https://www.oraika.com)
+- Overall Apache 2.0 and you can read [License](https://github.com/obsei/obsei/blob/master/LICENSE) file.
+- Multiple other secondary permissive or weak copyleft licenses (LGPL, MIT, BSD etc.) for third-party components refer [Attribution](https://github.com/obsei/obsei/blob/master/ATTRIBUTION.md).
+- To make project more commercial friendly, we void third party components which have strong copyleft licenses (GPL, AGPL etc.) into the project.
+
+## Attribution
+
+This could not have been possible without these [open source softwares](https://github.com/obsei/obsei/blob/master/ATTRIBUTION.md).
+
+## Contribution
+
+First off, thank you for even considering contributing to this package, every contribution big or small is greatly appreciated.
+Please refer our [Contribution Guideline](https://github.com/obsei/obsei/blob/master/CONTRIBUTING.md) and [Code of Conduct](https://github.com/obsei/obsei/blob/master/CODE_OF_CONDUCT.md).
+
+Thanks so much to all our contributors
+
+<a href="https://github.com/obsei/obsei/graphs/contributors">
+  <img src="https://contrib.rocks/image?repo=obsei/obsei" />
+</a>
diff --git a/obsei_module/SECURITY.md b/obsei_module/SECURITY.md
new file mode 100644
index 0000000000000000000000000000000000000000..40ce33e3996ab24222f9c236fe167128c507ed6e
--- /dev/null
+++ b/obsei_module/SECURITY.md
@@ -0,0 +1,5 @@
+# Security Policy
+
+## Reporting a Vulnerability
+
+For any security issue please report it via [email](mailto:contact@oraika.com).
diff --git a/obsei_module/__init__.py b/obsei_module/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/obsei_module/__pycache__/__init__.cpython-311.pyc b/obsei_module/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1356d2f750a409d6caf759f2ea6baed2abfed9cf
Binary files /dev/null and b/obsei_module/__pycache__/__init__.cpython-311.pyc differ
diff --git a/obsei_module/_config.yml b/obsei_module/_config.yml
new file mode 100644
index 0000000000000000000000000000000000000000..0b55420d431480b1c3f2d4515c45b47c2e0625df
--- /dev/null
+++ b/obsei_module/_config.yml
@@ -0,0 +1,9 @@
+theme: jekyll-theme-primer
+markdown: CommonMarkGhPages
+commonmark:
+  options: ["UNSAFE", "SMART", "FOOTNOTES"]
+  extensions: ["strikethrough", "autolink", "table", "tagfilter"]
+title: "Obsei: An open-source low-code AI powered automation tool"
+description: "Obsei is an open-source low-code AI powered automation tool"
+
+google_analytics: G-0E2FTKBK4T
diff --git a/obsei_module/_includes/head-custom-google-analytics.html b/obsei_module/_includes/head-custom-google-analytics.html
new file mode 100644
index 0000000000000000000000000000000000000000..360ca261d4caea0b2597b4d53b2e95605b341b86
--- /dev/null
+++ b/obsei_module/_includes/head-custom-google-analytics.html
@@ -0,0 +1,9 @@
+<!-- Global site tag (gtag.js) - Google Analytics -->
+<script async src="https://www.googletagmanager.com/gtag/js?id=G-0E2FTKBK4T"></script>
+<script>
+  window.dataLayer = window.dataLayer || [];
+  function gtag(){dataLayer.push(arguments);}
+  gtag('js', new Date());
+
+  gtag('config', 'G-0E2FTKBK4T');
+</script>
diff --git a/obsei_module/binder/requirements.txt b/obsei_module/binder/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c436e37c0702f46f8deb36b9deed2d3fb8491296
--- /dev/null
+++ b/obsei_module/binder/requirements.txt
@@ -0,0 +1,2 @@
+git+https://github.com/obsei/obsei@master#egg=obsei[all]
+trafilatura
diff --git a/obsei_module/example/app_store_scrapper_example.py b/obsei_module/example/app_store_scrapper_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..fcbf9bb1a4a24527319dda4b130a41f7cb12f549
--- /dev/null
+++ b/obsei_module/example/app_store_scrapper_example.py
@@ -0,0 +1,41 @@
+import logging
+import sys
+from datetime import datetime, timedelta
+
+import pytz
+
+from obsei.analyzer.classification_analyzer import ClassificationAnalyzerConfig, ZeroShotClassificationAnalyzer
+from obsei.misc.utils import DATETIME_STRING_PATTERN
+from obsei.source.appstore_scrapper import (
+    AppStoreScrapperConfig,
+    AppStoreScrapperSource,
+)
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(stream=sys.stdout, level=logging.INFO)
+
+since_time = datetime.utcnow().astimezone(pytz.utc) + timedelta(days=-5)
+source_config = AppStoreScrapperConfig(
+    app_url='https://apps.apple.com/us/app/gmail-email-by-google/id422689480',
+    lookup_period=since_time.strftime(DATETIME_STRING_PATTERN),
+    max_count=10,
+)
+
+source = AppStoreScrapperSource()
+
+text_analyzer = ZeroShotClassificationAnalyzer(
+    model_name_or_path="typeform/mobilebert-uncased-mnli", device="auto"
+)
+
+source_response_list = source.lookup(source_config)
+for idx, source_response in enumerate(source_response_list):
+    logger.info(f"source_response#'{idx}'='{source_response.__dict__}'")
+
+analyzer_response_list = text_analyzer.analyze_input(
+    source_response_list=source_response_list,
+    analyzer_config=ClassificationAnalyzerConfig(
+        labels=["interface", "slow", "battery"],
+    ),
+)
+for idx, an_response in enumerate(analyzer_response_list):
+    logger.info(f"analyzer_response#'{idx}'='{an_response.__dict__}'")
diff --git a/obsei_module/example/daily_get_example.py b/obsei_module/example/daily_get_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..5b8209b21f1113035aa7f45a3b419e77fbac73e2
--- /dev/null
+++ b/obsei_module/example/daily_get_example.py
@@ -0,0 +1,77 @@
+import logging
+import os
+import sys
+from pathlib import Path
+
+from obsei.sink.dailyget_sink import DailyGetSink, DailyGetSinkConfig
+from obsei.source.twitter_source import TwitterSource, TwitterSourceConfig
+from obsei.analyzer.classification_analyzer import (
+    ClassificationAnalyzerConfig,
+    ZeroShotClassificationAnalyzer,
+)
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(stream=sys.stdout, level=logging.INFO)
+
+sink_config = DailyGetSinkConfig(
+    url=os.environ["DAILYGET_URL"],
+    partner_id=os.environ["DAILYGET_PARTNER_ID"],
+    consumer_phone_number=os.environ["DAILYGET_CONSUMER_NUMBER"],
+    source_information="Twitter " + os.environ["DAILYGET_QUERY"],
+    base_payload={
+        "partnerId": os.environ["DAILYGET_PARTNER_ID"],
+        "consumerPhoneNumber": os.environ["DAILYGET_CONSUMER_NUMBER"],
+    },
+)
+
+dir_path = Path(__file__).resolve().parent.parent
+source_config = TwitterSourceConfig(
+    keywords=[os.environ["DAILYGET_QUERY"]],
+    lookup_period=os.environ["DAILYGET_LOOKUP_PERIOD"],
+    tweet_fields=[
+        "author_id",
+        "conversation_id",
+        "created_at",
+        "id",
+        "public_metrics",
+        "text",
+    ],
+    user_fields=["id", "name", "public_metrics", "username", "verified"],
+    expansions=["author_id"],
+    place_fields=None,
+    max_tweets=10,
+)
+
+source = TwitterSource()
+sink = DailyGetSink()
+text_analyzer = ZeroShotClassificationAnalyzer(
+    model_name_or_path="joeddav/bart-large-mnli-yahoo-answers",
+    #   model_name_or_path="joeddav/xlm-roberta-large-xnli",
+)
+
+source_response_list = source.lookup(source_config)
+for idx, source_response in enumerate(source_response_list):
+    logger.info(f"source_response#'{idx}'='{source_response.__dict__}'")
+
+analyzer_response_list = text_analyzer.analyze_input(
+    source_response_list=source_response_list,
+    analyzer_config=ClassificationAnalyzerConfig(
+        labels=[
+            "service",
+            "delay",
+            "tracking",
+            "no response",
+            "missing items",
+            "delivery",
+            "mask",
+        ],
+    ),
+)
+for idx, an_response in enumerate(analyzer_response_list):
+    logger.info(f"analyzer_response#'{idx}'='{an_response.__dict__}'")
+
+# HTTP Sink
+sink_response_list = sink.send_data(analyzer_response_list, sink_config)
+for sink_response in sink_response_list:
+    if sink_response is not None:
+        logger.info(f"sink_response='{sink_response.__dict__}'")
diff --git a/obsei_module/example/elasticsearch_example.py b/obsei_module/example/elasticsearch_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..81bc7594ab85cf66d6259d7b41c15ebf12c473fc
--- /dev/null
+++ b/obsei_module/example/elasticsearch_example.py
@@ -0,0 +1,69 @@
+import logging
+import sys
+from pathlib import Path
+
+from obsei.sink.elasticsearch_sink import ElasticSearchSink, ElasticSearchSinkConfig
+from obsei.source.twitter_source import TwitterSource, TwitterSourceConfig
+from obsei.analyzer.classification_analyzer import (
+    ClassificationAnalyzerConfig,
+    ZeroShotClassificationAnalyzer,
+)
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(stream=sys.stdout, level=logging.INFO)
+
+dir_path = Path(__file__).resolve().parent.parent
+source_config = TwitterSourceConfig(
+    keywords="@Handle",
+    lookup_period="1h",  # 1 Hour
+    tweet_fields=[
+        "author_id",
+        "conversation_id",
+        "created_at",
+        "id",
+        "public_metrics",
+        "text",
+    ],
+    user_fields=["id", "name", "public_metrics", "username", "verified"],
+    expansions=["author_id"],
+    place_fields=None,
+    max_tweets=10,
+)
+
+source = TwitterSource()
+text_analyzer = ZeroShotClassificationAnalyzer(
+    model_name_or_path="joeddav/bart-large-mnli-yahoo-answers",
+)
+
+# Start Elasticsearch server locally
+# `docker run -d --name elasticsearch -p 9200:9200 -e "discovery.type=single-node" elasticsearch:7.9.2`
+sink_config = ElasticSearchSinkConfig(
+    host="localhost",
+    port=9200,
+    index_name="test",
+)
+
+source_response_list = source.lookup(source_config)
+for idx, source_response in enumerate(source_response_list):
+    logger.info(f"source_response#'{idx}'='{source_response.__dict__}'")
+
+analyzer_response_list = text_analyzer.analyze_input(
+    source_response_list=source_response_list,
+    analyzer_config=ClassificationAnalyzerConfig(
+        labels=[
+            "service",
+            "delay",
+            "tracking",
+            "no response",
+            "missing items",
+            "delivery",
+            "mask",
+        ],
+    ),
+)
+for idx, an_response in enumerate(analyzer_response_list):
+    logger.info(f"analyzer_response#'{idx}'='{an_response.__dict__}'")
+
+sink = ElasticSearchSink()
+sink_response = sink.send_data(analyzer_response_list, sink_config)
+logger.info(f"sink_response='{sink_response}'")
diff --git a/obsei_module/example/email_source_example.py b/obsei_module/example/email_source_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..414819c8c56a5de328d7c7dbe694a5d9d5f4f2ef
--- /dev/null
+++ b/obsei_module/example/email_source_example.py
@@ -0,0 +1,36 @@
+import logging
+import os
+import sys
+from datetime import datetime, timedelta
+
+import pytz
+
+from obsei.misc.utils import DATETIME_STRING_PATTERN
+from obsei.source.email_source import EmailConfig, EmailCredInfo, EmailSource
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(stream=sys.stdout, level=logging.INFO)
+
+since_time = datetime.utcnow().astimezone(pytz.utc) + timedelta(hours=-10)
+
+# List of IMAP servers for most commonly used email providers
+# https://www.systoolsgroup.com/imap/
+# Also, if you're using a Gmail account then make sure you allow less secure apps on your account -
+# https://myaccount.google.com/lesssecureapps?pli=1
+# Also enable IMAP access -
+# https://mail.google.com/mail/u/0/#settings/fwdandpop
+source_config = EmailConfig(
+    imap_server="imap.gmail.com",
+    cred_info=EmailCredInfo(
+        # It will fetch username and password from environment variable
+        username=os.environ.get("email_username"),
+        password=os.environ.get("email_password"),
+    ),
+    lookup_period=since_time.strftime(DATETIME_STRING_PATTERN),
+)
+
+source = EmailSource()
+source_response_list = source.lookup(source_config)
+
+for source_response in source_response_list:
+    logger.info(source_response.__dict__)
diff --git a/obsei_module/example/facebook_example.py b/obsei_module/example/facebook_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..207e1eb005288648bc2c67f15150496e3fd66ab9
--- /dev/null
+++ b/obsei_module/example/facebook_example.py
@@ -0,0 +1,19 @@
+import logging
+import sys
+
+from obsei.source.facebook_source import FacebookSource, FacebookSourceConfig
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(stream=sys.stdout, level=logging.INFO)
+
+source_config = FacebookSourceConfig(page_id="110844591144719", lookup_period="2M")
+source = FacebookSource()
+source_response_list = source.lookup(source_config)
+
+logger.info("DETAILS:")
+for source_response in source_response_list:
+    logger.info(source_response)
+
+logger.info("TEXT:")
+for source_response in source_response_list:
+    logger.info(source_response.processed_text)
diff --git a/obsei_module/example/google_news_example.py b/obsei_module/example/google_news_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..40277f61d7f7b37f62154dd84ed7bb2003a35e9a
--- /dev/null
+++ b/obsei_module/example/google_news_example.py
@@ -0,0 +1,58 @@
+from obsei.analyzer.classification_analyzer import (
+    ClassificationAnalyzerConfig,
+    ZeroShotClassificationAnalyzer,
+)
+from obsei.source.google_news_source import GoogleNewsConfig, GoogleNewsSource
+
+# Only fetch title and highlight
+source_config_without_full_text = GoogleNewsConfig(
+    query="ai",
+    max_results=150,
+    after_date='2023-12-01',
+    before_date='2023-12-31',
+)
+
+# Fetch full news article
+source_config_with_full_text = GoogleNewsConfig(
+    query="ai",
+    max_results=5,
+    fetch_article=True,
+    lookup_period="1d",
+    # proxy="http://127.0.0.1:8080"
+)
+
+source = GoogleNewsSource()
+
+analyzer_config = ClassificationAnalyzerConfig(
+    labels=["buy", "sell", "going up", "going down"],
+)
+
+text_analyzer = ZeroShotClassificationAnalyzer(
+    model_name_or_path="typeform/mobilebert-uncased-mnli", device="auto"
+)
+
+news_articles_without_full_text = source.lookup(source_config_without_full_text)
+
+news_articles_with_full_text = source.lookup(source_config_with_full_text)
+
+
+analyzer_responses_without_full_text = text_analyzer.analyze_input(
+    source_response_list=news_articles_without_full_text,
+    analyzer_config=analyzer_config,
+)
+
+analyzer_responses_with_full_text = text_analyzer.analyze_input(
+    source_response_list=news_articles_with_full_text, analyzer_config=analyzer_config
+)
+
+for article in news_articles_without_full_text:
+    print(article.__dict__)
+
+for response in analyzer_responses_without_full_text:
+    print(response.__dict__)
+
+for article in news_articles_with_full_text:
+    print(article.__dict__)
+
+for response in analyzer_responses_with_full_text:
+    print(response.__dict__)
diff --git a/obsei_module/example/jira_example.py b/obsei_module/example/jira_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..00b59600731b689da9ab57bffd71285e729754e3
--- /dev/null
+++ b/obsei_module/example/jira_example.py
@@ -0,0 +1,77 @@
+# Jira Sink
+import logging
+import os
+import sys
+from pathlib import Path
+
+from pydantic import SecretStr
+
+from obsei.sink.jira_sink import JiraSink, JiraSinkConfig
+from obsei.source.twitter_source import (
+    TwitterCredentials,
+    TwitterSource,
+    TwitterSourceConfig,
+)
+from obsei.analyzer.classification_analyzer import (
+    ClassificationAnalyzerConfig,
+    ZeroShotClassificationAnalyzer,
+)
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(stream=sys.stdout, level=logging.INFO)
+
+dir_path = Path(__file__).resolve().parent.parent
+source_config = TwitterSourceConfig(
+    keywords=["facing issue"],
+    lookup_period="1h",
+    tweet_fields=[
+        "author_id",
+        "conversation_id",
+        "created_at",
+        "id",
+        "public_metrics",
+        "text",
+    ],
+    user_fields=["id", "name", "public_metrics", "username", "verified"],
+    expansions=["author_id"],
+    place_fields=None,
+    max_tweets=10,
+    cred_info=TwitterCredentials(
+        consumer_key=SecretStr(os.environ["twitter_consumer_key"]),
+        consumer_secret=SecretStr(os.environ["twitter_consumer_secret"]),
+    ),
+)
+
+source = TwitterSource()
+
+# To start jira server locally `atlas-run-standalone --product jira`
+jira_sink_config = JiraSinkConfig(
+    url="http://localhost:2990/jira",
+    username=SecretStr("admin"),
+    password=SecretStr("admin"),
+    issue_type={"name": "Task"},
+    project={"key": "CUS"},
+)
+jira_sink = JiraSink()
+
+text_analyzer = ZeroShotClassificationAnalyzer(
+    model_name_or_path="joeddav/bart-large-mnli-yahoo-answers"
+)
+
+source_response_list = source.lookup(source_config)
+for idx, source_response in enumerate(source_response_list):
+    logger.info(f"source_response#'{idx}'='{source_response.__dict__}'")
+
+analyzer_response_list = text_analyzer.analyze_input(
+    source_response_list=source_response_list,
+    analyzer_config=ClassificationAnalyzerConfig(
+        labels=["service", "delay", "performance"],
+    ),
+)
+for idx, an_response in enumerate(analyzer_response_list):
+    logger.info(f"analyzer_response#'{idx}'='{an_response.__dict__}'")
+
+sink_response_list = jira_sink.send_data(analyzer_response_list, jira_sink_config)
+for sink_response in sink_response_list:
+    if sink_response is not None:
+        logger.info(f"sink_response='{sink_response}'")
diff --git a/obsei_module/example/maps_review_scrapper_example.py b/obsei_module/example/maps_review_scrapper_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..d0633a025bb9fc15c933bd5a5a4058a0012e6392
--- /dev/null
+++ b/obsei_module/example/maps_review_scrapper_example.py
@@ -0,0 +1,22 @@
+import logging
+import sys
+
+from obsei.source.google_maps_reviews import (OSGoogleMapsReviewsConfig,
+                                              OSGoogleMapsReviewsSource)
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(stream=sys.stdout, level=logging.INFO)
+
+source_config = OSGoogleMapsReviewsConfig(
+    api_key="<Enter Your API Key>",  # Get API key from https://outscraper.com/
+    queries=[
+        "https://www.google.co.in/maps/place/Taj+Mahal/@27.1751496,78.0399535,17z/data=!4m5!3m4!1s0x39747121d702ff6d:0xdd2ae4803f767dde!8m2!3d27.1751448!4d78.0421422"
+    ],
+    number_of_reviews=3,
+)
+
+source = OSGoogleMapsReviewsSource()
+
+source_response_list = source.lookup(source_config)
+for source_response in source_response_list:
+    logger.info(source_response.__dict__)
diff --git a/obsei_module/example/pandas_sink_example.py b/obsei_module/example/pandas_sink_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..b2d817ad0c369e4b81eda9f754f149c50c2875c8
--- /dev/null
+++ b/obsei_module/example/pandas_sink_example.py
@@ -0,0 +1,49 @@
+import logging
+import sys
+
+from pandas import DataFrame
+
+from obsei.analyzer.classification_analyzer import (
+    ClassificationAnalyzerConfig,
+    ZeroShotClassificationAnalyzer,
+)
+from obsei.sink.pandas_sink import PandasSink, PandasSinkConfig
+from obsei.source.playstore_scrapper import (
+    PlayStoreScrapperConfig,
+    PlayStoreScrapperSource,
+)
+
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(stream=sys.stdout, level=logging.INFO)
+
+source_config = PlayStoreScrapperConfig(
+    countries=["us"], package_name="com.apcoaconnect", max_count=3
+)
+
+source = PlayStoreScrapperSource()
+
+text_analyzer = ZeroShotClassificationAnalyzer(
+    model_name_or_path="typeform/mobilebert-uncased-mnli", device="auto"
+)
+
+# initialize pandas sink config
+sink_config = PandasSinkConfig(dataframe=DataFrame())
+
+# initialize pandas sink
+sink = PandasSink()
+
+source_response_list = source.lookup(source_config)
+
+analyzer_response_list = text_analyzer.analyze_input(
+    source_response_list=source_response_list,
+    analyzer_config=ClassificationAnalyzerConfig(
+        labels=["no parking", "registration issue", "app issue", "payment issue"],
+    ),
+)
+
+dataframe = sink.send_data(
+    analyzer_responses=analyzer_response_list, config=sink_config
+)
+
+print(dataframe.to_csv())
diff --git a/obsei_module/example/pandas_source_example.py b/obsei_module/example/pandas_source_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..5a82af3ac3cb46fd4b08de92b00b04754249511c
--- /dev/null
+++ b/obsei_module/example/pandas_source_example.py
@@ -0,0 +1,27 @@
+import pandas as pd
+
+from obsei.source.pandas_source import (
+    PandasSourceConfig,
+    PandasSource,
+)
+import logging
+import sys
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(stream=sys.stdout, level=logging.INFO)
+
+# Initialize your Pandas DataFrame from your sources like csv, excel, sql etc
+# In following example we are reading csv which have two columns title and text
+csv_file = "https://raw.githubusercontent.com/deepset-ai/haystack/master/tutorials/small_generator_dataset.csv"
+dataframe = pd.read_csv(csv_file)
+
+source_config = PandasSourceConfig(
+    dataframe=dataframe,
+    include_columns=["title"],
+    text_columns=["text"],
+)
+source = PandasSource()
+
+source_response_list = source.lookup(source_config)
+for idx, source_response in enumerate(source_response_list):
+    logger.info(f"source_response#'{idx}'='{source_response.__dict__}'")
diff --git a/obsei_module/example/pii_analyzer_example.py b/obsei_module/example/pii_analyzer_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..36ec4ff72c3f5221ccbc7c35d74897619ce69514
--- /dev/null
+++ b/obsei_module/example/pii_analyzer_example.py
@@ -0,0 +1,33 @@
+import logging
+import sys
+
+from obsei.payload import TextPayload
+from obsei.analyzer.pii_analyzer import (
+    PresidioEngineConfig,
+    PresidioModelConfig,
+    PresidioPIIAnalyzer,
+    PresidioPIIAnalyzerConfig,
+)
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(stream=sys.stdout, level=logging.INFO)
+
+analyzer_config = PresidioPIIAnalyzerConfig(
+    analyze_only=False, return_decision_process=True
+)
+analyzer = PresidioPIIAnalyzer(
+    engine_config=PresidioEngineConfig(
+        nlp_engine_name="spacy",
+        models=[PresidioModelConfig(model_name="en_core_web_lg", lang_code="en")],
+    )
+)
+
+text_to_anonymize = "His name is Mr. Jones and his phone number is 212-555-5555"
+
+analyzer_results = analyzer.analyze_input(
+    source_response_list=[TextPayload(processed_text=text_to_anonymize)],
+    analyzer_config=analyzer_config,
+)
+
+for analyzer_result in analyzer_results:
+    logging.info(analyzer_result.to_dict())
diff --git a/obsei_module/example/play_store_reviews_example.py b/obsei_module/example/play_store_reviews_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..d37669a7a4441ce69be05152c7dad7aad5edd538
--- /dev/null
+++ b/obsei_module/example/play_store_reviews_example.py
@@ -0,0 +1,4 @@
+# TDB
+
+# Need proper service account file to test the changes :(
+print("TBD")
diff --git a/obsei_module/example/playstore_scrapper_example.py b/obsei_module/example/playstore_scrapper_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..00b1a5406f9c9785bd08262559edca0309832617
--- /dev/null
+++ b/obsei_module/example/playstore_scrapper_example.py
@@ -0,0 +1,40 @@
+import logging
+import sys
+
+from obsei.analyzer.classification_analyzer import (
+    ClassificationAnalyzerConfig,
+    ZeroShotClassificationAnalyzer,
+)
+
+from obsei.source.playstore_scrapper import (
+    PlayStoreScrapperConfig,
+    PlayStoreScrapperSource,
+)
+
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(stream=sys.stdout, level=logging.INFO)
+
+source_config = PlayStoreScrapperConfig(
+    app_url='https://play.google.com/store/apps/details?id=com.google.android.gm&hl=en_IN&gl=US',
+    max_count=3
+)
+
+source = PlayStoreScrapperSource()
+
+text_analyzer = ZeroShotClassificationAnalyzer(
+    model_name_or_path="typeform/mobilebert-uncased-mnli", device="auto"
+)
+
+source_response_list = source.lookup(source_config)
+for idx, source_response in enumerate(source_response_list):
+    logger.info(f"source_response#'{idx}'='{source_response.__dict__}'")
+
+analyzer_response_list = text_analyzer.analyze_input(
+    source_response_list=source_response_list,
+    analyzer_config=ClassificationAnalyzerConfig(
+        labels=["interface", "slow", "battery"],
+    ),
+)
+for idx, an_response in enumerate(analyzer_response_list):
+    logger.info(f"analyzer_response#'{idx}'='{an_response.__dict__}'")
diff --git a/obsei_module/example/playstore_scrapper_translator_example.py b/obsei_module/example/playstore_scrapper_translator_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..e89e09be4a17334368fcfe44039fa294728d020e
--- /dev/null
+++ b/obsei_module/example/playstore_scrapper_translator_example.py
@@ -0,0 +1,86 @@
+import json
+import logging
+import sys
+from datetime import datetime, timedelta
+
+import pytz
+
+from obsei.payload import TextPayload
+from obsei.analyzer.classification_analyzer import (
+    ClassificationAnalyzerConfig,
+    ZeroShotClassificationAnalyzer,
+)
+from obsei.analyzer.translation_analyzer import TranslationAnalyzer
+from obsei.misc.utils import DATETIME_STRING_PATTERN
+from obsei.source.playstore_scrapper import (
+    PlayStoreScrapperConfig,
+    PlayStoreScrapperSource,
+)
+
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(stream=sys.stdout, level=logging.INFO)
+source = PlayStoreScrapperSource()
+
+
+def source_fetch():
+    since_time = datetime.utcnow().astimezone(pytz.utc) + timedelta(days=-1)
+    source_config = PlayStoreScrapperConfig(
+        countries=["us"],
+        package_name="com.color.apps.hindikeyboard.hindi.language",
+        lookup_period=since_time.strftime(
+            DATETIME_STRING_PATTERN
+        ),  # todo should be optional
+        max_count=5,
+    )
+    return source.lookup(source_config)
+
+
+def translate_text(text_list):
+    translate_analyzer = TranslationAnalyzer(
+        model_name_or_path="Helsinki-NLP/opus-mt-hi-en", device="auto"
+    )
+    source_responses = [
+        TextPayload(processed_text=text.processed_text, source_name="sample")
+        for text in text_list
+    ]
+    analyzer_responses = translate_analyzer.analyze_input(
+        source_response_list=source_responses
+    )
+    return [
+        TextPayload(
+            processed_text=response.segmented_data["translated_text"],
+            source_name="translator",
+        )
+        for response in analyzer_responses
+    ]
+
+
+def classify_text(text_list):
+    text_analyzer = ZeroShotClassificationAnalyzer(
+        model_name_or_path="joeddav/bart-large-mnli-yahoo-answers", device="cpu"
+    )
+
+    return text_analyzer.analyze_input(
+        source_response_list=text_list,
+        analyzer_config=ClassificationAnalyzerConfig(
+            labels=["no parking", "registration issue", "app issue", "payment issue"],
+        ),
+    )
+
+
+def print_list(text_name, text_list):
+    for idx, text in enumerate(text_list):
+        json_response = json.dumps(text.__dict__, indent=4, sort_keys=True, default=str)
+        logger.info(f"\n{text_name}#'{idx}'='{json_response}'")
+
+
+logger.info("Started...")
+
+source_responses_list = source_fetch()
+translated_text_list = translate_text(source_responses_list)
+analyzer_response_list = classify_text(translated_text_list)
+
+print_list("source_response", source_responses_list)
+print_list("translator_response", translated_text_list)
+print_list("classifier_response", analyzer_response_list)
diff --git a/obsei_module/example/reddit_example.py b/obsei_module/example/reddit_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..fdf3a8d60c0058e8cdde32914d1b984d7cbc848f
--- /dev/null
+++ b/obsei_module/example/reddit_example.py
@@ -0,0 +1,50 @@
+import logging
+import sys
+import time
+from datetime import datetime, timedelta
+
+import pytz
+
+from obsei.misc.utils import DATETIME_STRING_PATTERN
+from obsei.source.reddit_source import RedditConfig, RedditSource
+from obsei.workflow.store import WorkflowStore
+from obsei.workflow.workflow import Workflow, WorkflowConfig
+
+
+def print_state(id: str):
+    logger.info(f"Source State: {source.store.get_source_state(id)}")
+
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(stream=sys.stdout, level=logging.INFO)
+
+since_time = datetime.utcnow().astimezone(pytz.utc) + timedelta(hours=-2)
+# Credentials will be fetched from env variable named reddit_client_id and reddit_client_secret
+source_config = RedditConfig(
+    subreddits=["wallstreetbets"],
+    lookup_period=since_time.strftime(DATETIME_STRING_PATTERN),
+)
+
+source = RedditSource(store=WorkflowStore())
+
+workflow = Workflow(
+    config=WorkflowConfig(
+        source_config=source_config,
+    ),
+)
+source.store.add_workflow(workflow)
+
+
+for i in range(1, 4):
+    print_state(workflow.id)
+    source_response_list = source.lookup(source_config, id=workflow.id)
+
+    if source_response_list is None or len(source_response_list) == 0:
+        break
+
+    for source_response in source_response_list:
+        logger.info(source_response.__dict__)
+
+    time.sleep(10)
+
+print_state(workflow.id)
diff --git a/obsei_module/example/reddit_scrapper_example.py b/obsei_module/example/reddit_scrapper_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..f306024440302f61ebf8f95f29adef98df0f8aaf
--- /dev/null
+++ b/obsei_module/example/reddit_scrapper_example.py
@@ -0,0 +1,30 @@
+import logging
+import sys
+from datetime import datetime, timedelta
+
+import pytz
+
+from obsei.misc.utils import DATETIME_STRING_PATTERN
+from obsei.source.reddit_scrapper import RedditScrapperConfig, RedditScrapperSource
+
+
+def print_state(id: str):
+    logger.info(f"Source State: {source.store.get_source_state(id)}")
+
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(stream=sys.stdout, level=logging.INFO)
+
+since_time = datetime.utcnow().astimezone(pytz.utc) + timedelta(days=-1)
+
+source_config = RedditScrapperConfig(
+    url="https://www.reddit.com/r/wallstreetbets/comments/.rss?sort=new",
+    user_agent="testscript by u/FitStatistician7378",
+    lookup_period=since_time.strftime(DATETIME_STRING_PATTERN),
+)
+
+source = RedditScrapperSource()
+
+source_response_list = source.lookup(source_config)
+for source_response in source_response_list:
+    logger.info(source_response.__dict__)
diff --git a/obsei_module/example/sdk.yaml b/obsei_module/example/sdk.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..89f5d55d3e42af1fc5fe1be71f2d92930342dde4
--- /dev/null
+++ b/obsei_module/example/sdk.yaml
@@ -0,0 +1,97 @@
+twitter_source:
+  _target_: obsei.source.twitter_source.TwitterSourceConfig
+  keywords:
+    - "@sample"
+  lookup_period: "1d"
+  tweet_fields:
+    - "author_id"
+    - "conversation_id"
+    - "created_at"
+    - "id"
+    - "public_metrics"
+    - "text"
+  user_fields:
+    - "id"
+    - "name"
+    - "public_metrics"
+    - "username"
+    - "verified"
+  expansions:
+    - "author_id"
+  place_fields: []
+  max_tweets: 10
+  credential:
+    _target_: obsei.source.twitter_source.TwitterCredentials
+    bearer_token: "bearer_token"
+
+play_store_source:
+  _target_: obsei.source.playstore_reviews.PlayStoreConfig
+  package_name: "com.company.package"
+  max_results: 10
+  num_retries: 2
+  cred_info:
+    _target_: obsei.source.playstore_reviews.GoogleCredInfo
+    service_cred_file: "foo/credential.json"
+    developer_key: "test_key"
+
+daily_get_sink:
+  _target_: obsei.sink.dailyget_sink.DailyGetSinkConfig
+  url: "http://localhost:8080/sample"
+  partner_id: "123456"
+  consumer_phone_number: "1234567890"
+  source_information: "Twitter @sample"
+  base_payload:
+    partnerId: daily_get_sink.partner_id
+    consumerPhoneNumber: daily_get_sink.consumer_phone_number
+
+http_sink:
+  _target_: obsei.sink.http_sink.HttpSinkConfig
+  url: "http://localhost:8080/sample"
+
+elasticsearch_sink:
+  _target_: obsei.sink.elasticsearch_sink.ElasticSearchSinkConfig
+  host: "localhost"
+  port: 9200
+  index_name: "test"
+
+jira_sink:
+  _target_: obsei.sink.jira_sink.JiraSinkConfig
+  url: "http://localhost:2990/jira"
+  username: "user"
+  password: "pass"
+  issue_type:
+    name: "Task"
+  project:
+    key: "CUS"
+
+analyzer_config:
+  _target_: obsei.analyzer.classification_analyzer.ClassificationAnalyzerConfig
+  labels:
+    - "service"
+    - "delay"
+    - "tracking"
+    - "no response"
+  add_positive_negative_labels: false
+
+analyzer:
+  _target_: obsei.analyzer.classification_analyzer.ZeroShotClassificationAnalyzer
+  model_name_or_path: "typeform/mobilebert-uncased-mnli"
+  device: "auto"
+
+slack_sink:
+  _target_: obsei.sink.SlackSink
+
+slack_sink_config:
+  _target_: obsei.sink.SlackSinkConfig
+  slack_token: 'Enter token'
+  channel_id: 'slack channel id'
+  jinja_template: |
+    ```
+           {%- for key, value in payload.items() recursive%}
+               {%- if value is mapping -%}
+      {{loop(value.items())}}
+               {%- else %}
+      {{key}}: {{value}}
+               {%- endif %}
+           {%- endfor%}
+    ```
diff --git a/obsei_module/example/slack_example.py b/obsei_module/example/slack_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..4d7d8a6d1730a7ef37217940eaafd5d96b40f2b0
--- /dev/null
+++ b/obsei_module/example/slack_example.py
@@ -0,0 +1,66 @@
+import logging
+import os
+import sys
+
+from obsei.analyzer.dummy_analyzer import DummyAnalyzer, DummyAnalyzerConfig
+from obsei.processor import Processor
+from obsei.sink.slack_sink import SlackSink, SlackSinkConfig
+from obsei.source.playstore_scrapper import (PlayStoreScrapperConfig,
+                                             PlayStoreScrapperSource)
+from obsei.workflow.store import WorkflowStore
+from obsei.workflow.workflow import Workflow, WorkflowConfig
+
+
+def print_state(identifier: str):
+    logger.info(f"Source State: {source.store.get_source_state(identifier)}")
+
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(stream=sys.stdout, level=logging.INFO)
+
+
+workflow_store = WorkflowStore()
+
+source_config = PlayStoreScrapperConfig(
+    app_url='https://play.google.com/store/apps/details?id=com.google.android.gm&hl=en_IN&gl=US',
+    max_count=3
+)
+
+source = PlayStoreScrapperSource(store=workflow_store)
+
+sink_config = SlackSinkConfig(
+    slack_token=os.environ["SLACK_TOKEN"],
+    channel_id="C01TUPZ23NZ",
+    jinja_template="""
+```
+     {%- for key, value in payload.items() recursive%}
+         {%- if value is mapping -%}
+{{loop(value.items())}}
+         {%- else %}
+{{key}}: {{value}}
+         {%- endif %}
+     {%- endfor%}
+```
+   """
+)
+sink = SlackSink(store=workflow_store)
+
+analyzer_config = DummyAnalyzerConfig()
+analyzer = DummyAnalyzer()
+
+workflow = Workflow(
+    config=WorkflowConfig(
+        source_config=source_config,
+        sink_config=sink_config,
+        analyzer_config=analyzer_config,
+    ),
+)
+workflow_store.add_workflow(workflow)
+
+processor = Processor(
+    analyzer=analyzer, sink=sink, source=source, analyzer_config=analyzer_config
+)
+
+processor.process(workflow=workflow)
+
+print_state(workflow.id)
diff --git a/obsei_module/example/twitter_source_example.py b/obsei_module/example/twitter_source_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..fc92b681dd4b2ea248162954c0ee2d2306b110f0
--- /dev/null
+++ b/obsei_module/example/twitter_source_example.py
@@ -0,0 +1,98 @@
+import logging
+import sys
+
+from obsei.analyzer.classification_analyzer import ZeroShotClassificationAnalyzer, ClassificationAnalyzerConfig
+from obsei.sink.slack_sink import SlackSinkConfig, SlackSink
+from obsei.source.twitter_source import TwitterSourceConfig, TwitterSource, TwitterCredentials
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(stream=sys.stdout, level=logging.INFO)
+
+twitter_cred_info = None
+
+# Enter your twitter credentials
+# Get it from https://developer.twitter.com/en/apply-for-access
+# Currently it will fetch from environment variables: twitter_bearer_token, twitter_consumer_key, twitter_consumer_secret
+# Uncomment below lines if you like to pass credentials directly instead of env variables
+
+# twitter_cred_info = TwitterCredentials(
+#     bearer_token='<Enter bearer_token>',
+#     consumer_key="<Enter consumer_key>",
+#     consumer_secret="<Enter consumer_secret>"
+# )
+
+source_config = TwitterSourceConfig(
+    query="bitcoin",
+    lookup_period="1h",
+    tweet_fields=[
+        "author_id",
+        "conversation_id",
+        "created_at",
+        "id",
+        "public_metrics",
+        "text",
+    ],
+    user_fields=["id", "name", "public_metrics", "username", "verified"],
+    expansions=["author_id"],
+    place_fields=None,
+    max_tweets=10,
+    cred_info=twitter_cred_info or None
+)
+
+source = TwitterSource()
+
+
+sink_config = SlackSinkConfig(
+    # Uncomment below lines if you like to pass credentials directly instead of env variables
+    #    slack_token="SLACK_TOKEN",
+    #    channel_id="CHANNEL_ID",
+    jinja_template="""
+:bell: Hi there!, a new `<{{payload['meta']['tweet_url']}}|tweet>` of interest is found by *Obsei*
+>📝 Content: 
+```{{payload['meta']['text']}}```
+>ℹ️Information:
+```
+User Name: {{payload['meta']['author_info']['name']}} ({{payload['meta']['author_info']['user_url']}})
+Tweet Metrics: Retweets={{payload['meta']['public_metrics']['retweet_count']}}, Likes={{payload['meta']['public_metrics']['like_count']}}
+Author Metrics: Verified={{payload['meta']['author_info']['verified']}}, Followers={{payload['meta']['author_info']['public_metrics']['followers_count']}}
+```
+>🧠 AI Engine Data:
+```
+     {%- for key, value in payload['segmented_data']['classifier_data'].items() recursive%}
+         {%- if value is mapping -%}
+{{loop(value.items())}}
+         {%- else %}
+{{key}}: {{value}}
+         {%- endif %}
+     {%- endfor%}
+```
+   """
+)
+sink = SlackSink()
+
+text_analyzer = ZeroShotClassificationAnalyzer(
+    model_name_or_path="typeform/mobilebert-uncased-mnli", device="auto"
+)
+
+analyzer_config = ClassificationAnalyzerConfig(
+    labels=["going up", "going down"],
+    add_positive_negative_labels=False,
+)
+
+source_response_list = source.lookup(source_config)
+for idx, source_response in enumerate(source_response_list):
+    logger.info(f"source_response#'{idx}'='{source_response.__dict__}'")
+
+analyzer_response_list = text_analyzer.analyze_input(
+    source_response_list=source_response_list,
+    analyzer_config=analyzer_config,
+)
+
+for idx, an_response in enumerate(analyzer_response_list):
+    logger.info(f"analyzer_response#'{idx}'='{an_response.__dict__}'")
+
+sink_response_list = sink.send_data(
+    analyzer_responses=analyzer_response_list, config=sink_config, id=id
+)
+for idx, sink_response in enumerate(sink_response_list):
+    logger.info(f"source_response#'{idx}'='{sink_response.__dict__}'")
diff --git a/obsei_module/example/web_crawler_example.py b/obsei_module/example/web_crawler_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..51e5d68ba7ea902781110b1cdf327cba25630383
--- /dev/null
+++ b/obsei_module/example/web_crawler_example.py
@@ -0,0 +1,43 @@
+# Fetch full news article
+from obsei.source.website_crawler_source import (
+    TrafilaturaCrawlerConfig,
+    TrafilaturaCrawlerSource,
+)
+
+
+def print_list(response_list):
+    for response in response_list:
+        print(response.__dict__)
+
+
+# Single URL
+source_config = TrafilaturaCrawlerConfig(urls=["https://obsei.github.io/obsei/"])
+
+source = TrafilaturaCrawlerSource()
+
+source_response_list = source.lookup(source_config)
+print_list(source_response_list)
+
+
+# RSS feed (Note it will take lot of time)
+source_config = TrafilaturaCrawlerConfig(
+    urls=["https://news.google.com/rss/search?q=bitcoin&hl=en&gl=US&ceid=US:en"],
+    is_feed=True,
+)
+
+source = TrafilaturaCrawlerSource()
+
+source_response_list = source.lookup(source_config)
+print_list(source_response_list)
+
+
+# Full website (Note it will take lot of time)
+source_config = TrafilaturaCrawlerConfig(
+    urls=["https://haystack.deepset.ai/"],
+    is_sitemap=True,
+)
+
+source = TrafilaturaCrawlerSource()
+
+source_response_list = source.lookup(source_config)
+print_list(source_response_list)
diff --git a/obsei_module/example/with_sdk_config_file.py b/obsei_module/example/with_sdk_config_file.py
new file mode 100644
index 0000000000000000000000000000000000000000..37da8373fa032c2ceecab54de181ed9e9190ae04
--- /dev/null
+++ b/obsei_module/example/with_sdk_config_file.py
@@ -0,0 +1,28 @@
+import logging
+import sys
+
+from obsei.configuration import ObseiConfiguration
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(stream=sys.stdout, level=logging.INFO)
+
+obsei_configuration = ObseiConfiguration(
+    config_path="../example",
+    config_filename="sdk.yaml",
+)
+
+text_analyzer = obsei_configuration.initialize_instance("analyzer")
+analyzer_config = obsei_configuration.initialize_instance("analyzer_config")
+slack_source_config = obsei_configuration.initialize_instance("slack_sink_config")
+slack_sink = obsei_configuration.initialize_instance("slack_sink")
+
+play_store_source_config = obsei_configuration.initialize_instance("play_store_source")
+twitter_source_config = obsei_configuration.initialize_instance("twitter_source")
+http_sink_config = obsei_configuration.initialize_instance("http_sink")
+daily_get_sink_config = obsei_configuration.initialize_instance("daily_get_sink")
+# docker run -d --name elasticsearch -p 9200:9200 -e "discovery.type=single-node" elasticsearch:7.9.2
+elasticsearch_sink_config = obsei_configuration.initialize_instance(
+    "elasticsearch_sink"
+)
+# Start jira server locally `atlas-run-standalone --product jira`
+jira_sink_config = obsei_configuration.initialize_instance("jira_sink")
diff --git a/obsei_module/example/with_state_example.py b/obsei_module/example/with_state_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..40ac7a0bc68a781907fd720861f035f557c2b67f
--- /dev/null
+++ b/obsei_module/example/with_state_example.py
@@ -0,0 +1,60 @@
+import logging
+import sys
+import time
+
+from obsei.workflow.store import WorkflowStore
+from obsei.source.twitter_source import TwitterSource, TwitterSourceConfig
+from obsei.workflow.workflow import Workflow, WorkflowConfig
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(stream=sys.stdout, level=logging.INFO)
+
+# Create workflow store instance, by default it will use SQLite to store state data
+store = WorkflowStore()
+
+# Pass store reference to observer, so it can use it to store state data
+source = TwitterSource(store=store)
+
+
+def print_state(id: str):
+    logger.info(f"Source State: {source.store.get_source_state(id)}")
+
+
+source_config = TwitterSourceConfig(
+    keywords=["india"],
+    lookup_period="2m",
+    tweet_fields=[
+        "author_id",
+        "conversation_id",
+        "created_at",
+        "id",
+        "public_metrics",
+        "text",
+    ],
+    user_fields=["id", "name", "public_metrics", "username", "verified"],
+    expansions=["author_id"],
+    place_fields=None,
+    max_tweets=10,
+)
+
+# Create instance of workflow, adding observer config to it, it will autgenerate unique workflow id
+workflow = Workflow(
+    config=WorkflowConfig(
+        source_config=source_config,
+    ),
+)
+# Insert workflow config to DB store
+store.add_workflow(workflow)
+
+for i in range(1, 4):
+    print_state(workflow.id)
+    # Now always pass workflow id to lookup function
+    # Observer will fetch old data from DB suing this id and later store new updated state data against this id to DB 
+    source_response_list = source.lookup(source_config, id=workflow.id)
+
+    if source_response_list is None or len(source_response_list) == 0:
+        break
+
+    time.sleep(180)
+
+print_state(workflow.id)
diff --git a/obsei_module/example/youtube_scrapper_example.py b/obsei_module/example/youtube_scrapper_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..6bded3d4884a7f7523eea96e9275e648ebd9ad2f
--- /dev/null
+++ b/obsei_module/example/youtube_scrapper_example.py
@@ -0,0 +1,36 @@
+import logging
+import sys
+
+from obsei.analyzer.classification_analyzer import (
+    ClassificationAnalyzerConfig, ZeroShotClassificationAnalyzer)
+from obsei.source.youtube_scrapper import (YoutubeScrapperConfig,
+                                           YoutubeScrapperSource)
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(stream=sys.stdout, level=logging.INFO)
+
+source_config = YoutubeScrapperConfig(
+    video_url="https://www.youtube.com/watch?v=uZfns0JIlFk",
+    fetch_replies=True,
+    max_comments=10,
+    lookup_period="1Y",
+)
+
+source = YoutubeScrapperSource()
+
+source_response_list = source.lookup(source_config)
+for idx, source_response in enumerate(source_response_list):
+    logger.info(f"source_response#'{idx}'='{source_response.__dict__}'")
+
+text_analyzer = ZeroShotClassificationAnalyzer(
+    model_name_or_path="typeform/mobilebert-uncased-mnli", device="auto"
+)
+
+analyzer_response_list = text_analyzer.analyze_input(
+    source_response_list=source_response_list,
+    analyzer_config=ClassificationAnalyzerConfig(
+        labels=["interesting", "enquiring"],
+    ),
+)
+for idx, an_response in enumerate(analyzer_response_list):
+    logger.info(f"analyzer_response#'{idx}'='{an_response.__dict__}'")
diff --git a/obsei_module/images/Obsei-flow-diagram.png b/obsei_module/images/Obsei-flow-diagram.png
new file mode 100644
index 0000000000000000000000000000000000000000..e25e5fb52066f679c4ee7433cdcb59c19c4b4bd3
Binary files /dev/null and b/obsei_module/images/Obsei-flow-diagram.png differ
diff --git a/obsei_module/images/Obsei-future-concept.png b/obsei_module/images/Obsei-future-concept.png
new file mode 100644
index 0000000000000000000000000000000000000000..84d97eb97c5cc36b2d471789bad8edc23f2ad48d
Binary files /dev/null and b/obsei_module/images/Obsei-future-concept.png differ
diff --git a/obsei_module/images/jira_screenshot.png b/obsei_module/images/jira_screenshot.png
new file mode 100644
index 0000000000000000000000000000000000000000..d14e391487bdf46c5b279dd0f60c6de16302e901
Binary files /dev/null and b/obsei_module/images/jira_screenshot.png differ
diff --git a/obsei_module/images/logos/Slack_join.svg b/obsei_module/images/logos/Slack_join.svg
new file mode 100644
index 0000000000000000000000000000000000000000..1224dfdaba2b72dbfbc3f86a9e9a29aab4dc96b2
--- /dev/null
+++ b/obsei_module/images/logos/Slack_join.svg
@@ -0,0 +1,49 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!-- Generator: Adobe Illustrator 23.0.1, SVG Export Plug-In . SVG Version: 6.00 Build 0)  -->
+<svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
+	 viewBox="0 0 622.3 254.4" style="enable-background:new 0 0 622.3 254.4;" xml:space="preserve">
+<style type="text/css">
+	.st0{fill:#E01E5A;}
+	.st1{fill:#36C5F0;}
+	.st2{fill:#2EB67D;}
+	.st3{fill:#ECB22E;}
+</style>
+<g>
+	<g>
+		<path d="M221.5,161.5l6.2-14.4c6.7,5,15.6,7.6,24.4,7.6c6.5,0,10.6-2.5,10.6-6.3c-0.1-10.6-38.9-2.3-39.2-28.9
+			c-0.1-13.5,11.9-23.9,28.9-23.9c10.1,0,20.2,2.5,27.4,8.2l-5.8,14.7c-6.6-4.2-14.8-7.2-22.6-7.2c-5.3,0-8.8,2.5-8.8,5.7
+			c0.1,10.4,39.2,4.7,39.6,30.1c0,13.8-11.7,23.5-28.5,23.5C241.4,170.6,230.1,167.7,221.5,161.5"/>
+		<path d="M459.4,141.9c-3.1,5.4-8.9,9.1-15.6,9.1c-9.9,0-17.9-8-17.9-17.9s8-17.9,17.9-17.9c6.7,0,12.5,3.7,15.6,9.1l17.1-9.5
+			c-6.4-11.4-18.7-19.2-32.7-19.2c-20.7,0-37.5,16.8-37.5,37.5c0,20.7,16.8,37.5,37.5,37.5c14.1,0,26.3-7.7,32.7-19.2L459.4,141.9z"
+			/>
+		<rect x="290.8" y="64.5" width="21.4" height="104.7"/>
+		<polygon points="484.9,64.5 484.9,169.2 506.3,169.2 506.3,137.8 531.7,169.2 559.1,169.2 526.8,131.9 556.7,97.1 530.5,97.1
+			506.3,126 506.3,64.5 		"/>
+		<path d="M375.8,142.1c-3.1,5.1-9.5,8.9-16.7,8.9c-9.9,0-17.9-8-17.9-17.9s8-17.9,17.9-17.9c7.2,0,13.6,4,16.7,9.2V142.1z
+			 M375.8,97.1v8.5c-3.5-5.9-12.2-10-21.3-10c-18.8,0-33.6,16.6-33.6,37.4c0,20.8,14.8,37.6,33.6,37.6c9.1,0,17.8-4.1,21.3-10v8.5
+			h21.4v-72H375.8z"/>
+	</g>
+	<g>
+		<g>
+			<path class="st0" d="M89.2,142c0,7.3-5.9,13.2-13.2,13.2s-13.2-5.9-13.2-13.2s5.9-13.2,13.2-13.2h13.2V142z"/>
+			<path class="st0" d="M95.8,142c0-7.3,5.9-13.2,13.2-13.2s13.2,5.9,13.2,13.2V175c0,7.3-5.9,13.2-13.2,13.2s-13.2-5.9-13.2-13.2
+				V142z"/>
+		</g>
+		<g>
+			<path class="st1" d="M109,89c-7.3,0-13.2-5.9-13.2-13.2c0-7.3,5.9-13.2,13.2-13.2s13.2,5.9,13.2,13.2V89H109z"/>
+			<path class="st1" d="M109,95.7c7.3,0,13.2,5.9,13.2,13.2c0,7.3-5.9,13.2-13.2,13.2H75.9c-7.3,0-13.2-5.9-13.2-13.2
+				c0-7.3,5.9-13.2,13.2-13.2H109z"/>
+		</g>
+		<g>
+			<path class="st2" d="M161.9,108.9c0-7.3,5.9-13.2,13.2-13.2s13.2,5.9,13.2,13.2c0,7.3-5.9,13.2-13.2,13.2h-13.2V108.9z"/>
+			<path class="st2" d="M155.3,108.9c0,7.3-5.9,13.2-13.2,13.2s-13.2-5.9-13.2-13.2V75.8c0-7.3,5.9-13.2,13.2-13.2
+				s13.2,5.9,13.2,13.2V108.9z"/>
+		</g>
+		<g>
+			<path class="st3" d="M142.1,161.8c7.3,0,13.2,5.9,13.2,13.2c0,7.3-5.9,13.2-13.2,13.2s-13.2-5.9-13.2-13.2v-13.2H142.1z"/>
+			<path class="st3" d="M142.1,155.2c-7.3,0-13.2-5.9-13.2-13.2s5.9-13.2,13.2-13.2h33.1c7.3,0,13.2,5.9,13.2,13.2
+				s-5.9,13.2-13.2,13.2H142.1z"/>
+		</g>
+	</g>
+</g>
+</svg>
diff --git a/obsei_module/images/logos/appstore.png b/obsei_module/images/logos/appstore.png
new file mode 100644
index 0000000000000000000000000000000000000000..a88786b5cfc6d42af039a0cf0ca15ef0f4ae378f
Binary files /dev/null and b/obsei_module/images/logos/appstore.png differ
diff --git a/obsei_module/images/logos/classification.png b/obsei_module/images/logos/classification.png
new file mode 100644
index 0000000000000000000000000000000000000000..c272cc21dd36d86e3e0fedb387fafe758af59bb1
Binary files /dev/null and b/obsei_module/images/logos/classification.png differ
diff --git a/obsei_module/images/logos/dummy.png b/obsei_module/images/logos/dummy.png
new file mode 100644
index 0000000000000000000000000000000000000000..65a21ae2882d0e26c88e1694425155a81e418507
Binary files /dev/null and b/obsei_module/images/logos/dummy.png differ
diff --git a/obsei_module/images/logos/elastic.png b/obsei_module/images/logos/elastic.png
new file mode 100644
index 0000000000000000000000000000000000000000..47e53a58326b6bf4d5bc8a9a48811c360311a7dd
Binary files /dev/null and b/obsei_module/images/logos/elastic.png differ
diff --git a/obsei_module/images/logos/facebook.png b/obsei_module/images/logos/facebook.png
new file mode 100644
index 0000000000000000000000000000000000000000..b4374d2b0528a0ec9f617a5c7ee33722e98a7d19
Binary files /dev/null and b/obsei_module/images/logos/facebook.png differ
diff --git a/obsei_module/images/logos/gmail.png b/obsei_module/images/logos/gmail.png
new file mode 100644
index 0000000000000000000000000000000000000000..357c439d8d8d7f884a3eeaaf1e1412d73ff5a72c
Binary files /dev/null and b/obsei_module/images/logos/gmail.png differ
diff --git a/obsei_module/images/logos/googlenews.png b/obsei_module/images/logos/googlenews.png
new file mode 100644
index 0000000000000000000000000000000000000000..a7cf45acdb3bb1303a7e2b7ac8f2db9de11da558
Binary files /dev/null and b/obsei_module/images/logos/googlenews.png differ
diff --git a/obsei_module/images/logos/http_api.png b/obsei_module/images/logos/http_api.png
new file mode 100644
index 0000000000000000000000000000000000000000..8bc5c9ae4f47022088572a1753711bf5fd669948
Binary files /dev/null and b/obsei_module/images/logos/http_api.png differ
diff --git a/obsei_module/images/logos/jira.png b/obsei_module/images/logos/jira.png
new file mode 100644
index 0000000000000000000000000000000000000000..d92cf6f845594ace475c07c07cbbcfac0907a660
Binary files /dev/null and b/obsei_module/images/logos/jira.png differ
diff --git a/obsei_module/images/logos/logger.png b/obsei_module/images/logos/logger.png
new file mode 100644
index 0000000000000000000000000000000000000000..34a68ee5a5e73c2b9963ff9284482dcf4effe0f3
Binary files /dev/null and b/obsei_module/images/logos/logger.png differ
diff --git a/obsei_module/images/logos/ner.png b/obsei_module/images/logos/ner.png
new file mode 100644
index 0000000000000000000000000000000000000000..06297eabfb3d5d589943decf931e2ac096ee38d3
Binary files /dev/null and b/obsei_module/images/logos/ner.png differ
diff --git a/obsei_module/images/logos/obsei_200x200.png b/obsei_module/images/logos/obsei_200x200.png
new file mode 100644
index 0000000000000000000000000000000000000000..cb4cb25373412834d3384c0308c8039667111876
Binary files /dev/null and b/obsei_module/images/logos/obsei_200x200.png differ
diff --git a/obsei_module/images/logos/pandas.svg b/obsei_module/images/logos/pandas.svg
new file mode 100644
index 0000000000000000000000000000000000000000..1451f57de198e7283f900a2538212c3ee27458f9
--- /dev/null
+++ b/obsei_module/images/logos/pandas.svg
@@ -0,0 +1,111 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<svg
+   xmlns:dc="http://purl.org/dc/elements/1.1/"
+   xmlns:cc="http://creativecommons.org/ns#"
+   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+   xmlns:svg="http://www.w3.org/2000/svg"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+   id="Layer_1"
+   data-name="Layer 1"
+   viewBox="0 0 210.21 280.43"
+   version="1.1"
+   sodipodi:docname="pandas_mark.svg"
+   inkscape:version="0.92.4 (unknown)">
+  <metadata
+     id="metadata27">
+    <rdf:RDF>
+      <cc:Work
+         rdf:about="">
+        <dc:format>image/svg+xml</dc:format>
+        <dc:type
+           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+      </cc:Work>
+    </rdf:RDF>
+  </metadata>
+  <sodipodi:namedview
+     pagecolor="#ffffff"
+     bordercolor="#666666"
+     borderopacity="1"
+     objecttolerance="10"
+     gridtolerance="10"
+     guidetolerance="10"
+     inkscape:pageopacity="0"
+     inkscape:pageshadow="2"
+     inkscape:window-width="1131"
+     inkscape:window-height="921"
+     id="namedview25"
+     showgrid="false"
+     inkscape:zoom="0.84156476"
+     inkscape:cx="107.48153"
+     inkscape:cy="140.215"
+     inkscape:window-x="0"
+     inkscape:window-y="0"
+     inkscape:window-maximized="0"
+     inkscape:current-layer="Layer_1" />
+  <defs
+     id="defs4">
+    <style
+       id="style2">.cls-1{fill:#130754;}.cls-2{fill:#48e5ac;}.cls-3{fill:#e70488;}</style>
+  </defs>
+  <title
+     id="title6">Artboard 61</title>
+  <rect
+     class="cls-1"
+     x="74.51"
+     y="43.03"
+     width="24.09"
+     height="50.02"
+     id="rect8" />
+  <rect
+     class="cls-1"
+     x="74.51"
+     y="145.78"
+     width="24.09"
+     height="50.02"
+     id="rect10" />
+  <rect
+     class="cls-2"
+     x="74.51"
+     y="107.65"
+     width="24.09"
+     height="23.6"
+     id="rect12"
+     style="fill:#ffca00;fill-opacity:1" />
+  <rect
+     class="cls-1"
+     x="35.81"
+     y="84.15"
+     width="24.09"
+     height="166.27"
+     id="rect14" />
+  <rect
+     class="cls-1"
+     x="112.41"
+     y="187.05"
+     width="24.09"
+     height="50.02"
+     id="rect16" />
+  <rect
+     class="cls-1"
+     x="112.41"
+     y="84.21"
+     width="24.09"
+     height="50.02"
+     id="rect18" />
+  <rect
+     class="cls-3"
+     x="112.41"
+     y="148.84"
+     width="24.09"
+     height="23.6"
+     id="rect20" />
+  <rect
+     class="cls-1"
+     x="150.3"
+     y="30"
+     width="24.09"
+     height="166.27"
+     id="rect22" />
+</svg>
diff --git a/obsei_module/images/logos/pii.png b/obsei_module/images/logos/pii.png
new file mode 100644
index 0000000000000000000000000000000000000000..13a6826f6f8aed02e7b1e89a2a9fac1ff3510481
Binary files /dev/null and b/obsei_module/images/logos/pii.png differ
diff --git a/obsei_module/images/logos/playstore.png b/obsei_module/images/logos/playstore.png
new file mode 100644
index 0000000000000000000000000000000000000000..c054cd04bb47e26fdba2d5b66071a63317182f36
Binary files /dev/null and b/obsei_module/images/logos/playstore.png differ
diff --git a/obsei_module/images/logos/reddit.png b/obsei_module/images/logos/reddit.png
new file mode 100644
index 0000000000000000000000000000000000000000..695eff14557b7ae25f594febd4cd562013fb9c5a
Binary files /dev/null and b/obsei_module/images/logos/reddit.png differ
diff --git a/obsei_module/images/logos/sentiment.png b/obsei_module/images/logos/sentiment.png
new file mode 100644
index 0000000000000000000000000000000000000000..632d49b471815a10ad16e6bdbe0db53a549b6076
Binary files /dev/null and b/obsei_module/images/logos/sentiment.png differ
diff --git a/obsei_module/images/logos/slack.svg b/obsei_module/images/logos/slack.svg
new file mode 100644
index 0000000000000000000000000000000000000000..c37dc5eb49e3ef638f9dd6f4cf9ab345db8c141d
--- /dev/null
+++ b/obsei_module/images/logos/slack.svg
@@ -0,0 +1,33 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!-- Generator: Adobe Illustrator 23.0.1, SVG Export Plug-In . SVG Version: 6.00 Build 0)  -->
+<svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
+	 viewBox="0 0 270 270" style="enable-background:new 0 0 270 270;" xml:space="preserve">
+<style type="text/css">
+	.st0{fill:#E01E5A;}
+	.st1{fill:#36C5F0;}
+	.st2{fill:#2EB67D;}
+	.st3{fill:#ECB22E;}
+</style>
+<g>
+	<g>
+		<path class="st0" d="M99.4,151.2c0,7.1-5.8,12.9-12.9,12.9c-7.1,0-12.9-5.8-12.9-12.9c0-7.1,5.8-12.9,12.9-12.9h12.9V151.2z"/>
+		<path class="st0" d="M105.9,151.2c0-7.1,5.8-12.9,12.9-12.9s12.9,5.8,12.9,12.9v32.3c0,7.1-5.8,12.9-12.9,12.9
+			s-12.9-5.8-12.9-12.9V151.2z"/>
+	</g>
+	<g>
+		<path class="st1" d="M118.8,99.4c-7.1,0-12.9-5.8-12.9-12.9c0-7.1,5.8-12.9,12.9-12.9s12.9,5.8,12.9,12.9v12.9H118.8z"/>
+		<path class="st1" d="M118.8,105.9c7.1,0,12.9,5.8,12.9,12.9s-5.8,12.9-12.9,12.9H86.5c-7.1,0-12.9-5.8-12.9-12.9
+			s5.8-12.9,12.9-12.9H118.8z"/>
+	</g>
+	<g>
+		<path class="st2" d="M170.6,118.8c0-7.1,5.8-12.9,12.9-12.9c7.1,0,12.9,5.8,12.9,12.9s-5.8,12.9-12.9,12.9h-12.9V118.8z"/>
+		<path class="st2" d="M164.1,118.8c0,7.1-5.8,12.9-12.9,12.9c-7.1,0-12.9-5.8-12.9-12.9V86.5c0-7.1,5.8-12.9,12.9-12.9
+			c7.1,0,12.9,5.8,12.9,12.9V118.8z"/>
+	</g>
+	<g>
+		<path class="st3" d="M151.2,170.6c7.1,0,12.9,5.8,12.9,12.9c0,7.1-5.8,12.9-12.9,12.9c-7.1,0-12.9-5.8-12.9-12.9v-12.9H151.2z"/>
+		<path class="st3" d="M151.2,164.1c-7.1,0-12.9-5.8-12.9-12.9c0-7.1,5.8-12.9,12.9-12.9h32.3c7.1,0,12.9,5.8,12.9,12.9
+			c0,7.1-5.8,12.9-12.9,12.9H151.2z"/>
+	</g>
+</g>
+</svg>
diff --git a/obsei_module/images/logos/translator.png b/obsei_module/images/logos/translator.png
new file mode 100644
index 0000000000000000000000000000000000000000..2aa0174fa5f132d6474e051e9be503c105f03719
Binary files /dev/null and b/obsei_module/images/logos/translator.png differ
diff --git a/obsei_module/images/logos/twitter.png b/obsei_module/images/logos/twitter.png
new file mode 100644
index 0000000000000000000000000000000000000000..4e72f8cb100d33850ff0141e009afc30cd0c79b0
Binary files /dev/null and b/obsei_module/images/logos/twitter.png differ
diff --git a/obsei_module/images/logos/webcrawler.png b/obsei_module/images/logos/webcrawler.png
new file mode 100644
index 0000000000000000000000000000000000000000..0fd7f3623df18ed0493e08d526167e7cd1e1ee7e
Binary files /dev/null and b/obsei_module/images/logos/webcrawler.png differ
diff --git a/obsei_module/images/logos/zendesk.png b/obsei_module/images/logos/zendesk.png
new file mode 100644
index 0000000000000000000000000000000000000000..78bfb22e62f72e0a034b8e14a8964d9e5c185375
Binary files /dev/null and b/obsei_module/images/logos/zendesk.png differ
diff --git a/obsei_module/images/obsei-flyer.png b/obsei_module/images/obsei-flyer.png
new file mode 100644
index 0000000000000000000000000000000000000000..e94f831d50cceada70f07a5e1f6814201f7fa76b
Binary files /dev/null and b/obsei_module/images/obsei-flyer.png differ
diff --git a/obsei_module/images/obsei-ui-demo.png b/obsei_module/images/obsei-ui-demo.png
new file mode 100644
index 0000000000000000000000000000000000000000..c0ce9bc92418c70a4e5b7210f7ab89024820dc17
Binary files /dev/null and b/obsei_module/images/obsei-ui-demo.png differ
diff --git a/obsei_module/images/obsei_flow.gif b/obsei_module/images/obsei_flow.gif
new file mode 100644
index 0000000000000000000000000000000000000000..a6538b34996b6092ce3c978b1daf8ff9ad4da683
--- /dev/null
+++ b/obsei_module/images/obsei_flow.gif
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2bb0b0b15bac52084145aea23f9b47b207853ce9c45d4c355ccadffadc129bb9
+size 6226733
diff --git a/obsei_module/mypy.ini b/obsei_module/mypy.ini
new file mode 100644
index 0000000000000000000000000000000000000000..976ba0294638950e865be3934cbeee3b6305ffd6
--- /dev/null
+++ b/obsei_module/mypy.ini
@@ -0,0 +1,2 @@
+[mypy]
+ignore_missing_imports = True
diff --git a/obsei_module/obsei-master/.github/ISSUE_TEMPLATE/bug_report.md b/obsei_module/obsei-master/.github/ISSUE_TEMPLATE/bug_report.md
new file mode 100644
index 0000000000000000000000000000000000000000..c6915c4ae905cb402e1dc710b3daafb8f6360df4
--- /dev/null
+++ b/obsei_module/obsei-master/.github/ISSUE_TEMPLATE/bug_report.md
@@ -0,0 +1,27 @@
+---
+name: Bug report
+about: Create a report to help us improve
+title: "[BUG]"
+labels: bug
+assignees: lalitpagaria
+
+---
+
+**Describe the bug**
+A clear and concise description of what the bug is.
+
+**To Reproduce**
+Steps to reproduce the behavior:
+
+**Expected behavior**
+A clear and concise description of what you expected to happen.
+
+**Stacktrace**
+If applicable, add stacktrace to help explain your problem.
+
+**Please complete the following information:**
+ - OS:
+ - Version:
+
+**Additional context**
+Add any other context about the problem here.
diff --git a/obsei_module/obsei-master/.github/ISSUE_TEMPLATE/feature_request.md b/obsei_module/obsei-master/.github/ISSUE_TEMPLATE/feature_request.md
new file mode 100644
index 0000000000000000000000000000000000000000..11fc491ef1dae316f2b06bbb40eaba9c757fdfd1
--- /dev/null
+++ b/obsei_module/obsei-master/.github/ISSUE_TEMPLATE/feature_request.md
@@ -0,0 +1,20 @@
+---
+name: Feature request
+about: Suggest an idea for this project
+title: ''
+labels: enhancement
+assignees: ''
+
+---
+
+**Is your feature request related to a problem? Please describe.**
+A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
+
+**Describe the solution you'd like**
+A clear and concise description of what you want to happen.
+
+**Describe alternatives you've considered**
+A clear and concise description of any alternative solutions or features you've considered.
+
+**Additional context**
+Add any other context or screenshots about the feature request here.
diff --git a/obsei_module/obsei-master/.github/dependabot.yml b/obsei_module/obsei-master/.github/dependabot.yml
new file mode 100644
index 0000000000000000000000000000000000000000..2c7d1708395e202b3b3316391f35bf4183ebd045
--- /dev/null
+++ b/obsei_module/obsei-master/.github/dependabot.yml
@@ -0,0 +1,7 @@
+version: 2
+updates:
+  # Maintain dependencies for GitHub Actions
+  - package-ecosystem: "github-actions"
+    directory: "/"
+    schedule:
+      interval: "daily"
diff --git a/obsei_module/obsei-master/.github/release-drafter.yml b/obsei_module/obsei-master/.github/release-drafter.yml
new file mode 100644
index 0000000000000000000000000000000000000000..794187190e6f3fb290174970df09c18306b58a39
--- /dev/null
+++ b/obsei_module/obsei-master/.github/release-drafter.yml
@@ -0,0 +1,33 @@
+name-template: 'v$RESOLVED_VERSION 🌈'
+tag-template: 'v$RESOLVED_VERSION'
+categories:
+  - title: '🚀 Features'
+    labels:
+      - 'feature'
+      - 'enhancement'
+  - title: '🐛 Bug Fixes'
+    labels:
+      - 'fix'
+      - 'bugfix'
+      - 'bug'
+  - title: '🧰 Maintenance'
+    label: 'chore'
+  - title: '⚠️Breaking Changes'
+    label: 'breaking changes'
+change-template: '- $TITLE @$AUTHOR (#$NUMBER)'
+change-title-escapes: '\<*_&' # You can add # and @ to disable mentions, and add ` to disable code blocks.
+version-resolver:
+  major:
+    labels:
+      - 'major'
+  minor:
+    labels:
+      - 'minor'
+  patch:
+    labels:
+      - 'patch'
+  default: patch
+template: |
+  ## Changes
+
+  $CHANGES
\ No newline at end of file
diff --git a/obsei_module/obsei-master/.github/workflows/build.yml b/obsei_module/obsei-master/.github/workflows/build.yml
new file mode 100644
index 0000000000000000000000000000000000000000..767b04e369bceb740995187c9c3dfda5e3a90325
--- /dev/null
+++ b/obsei_module/obsei-master/.github/workflows/build.yml
@@ -0,0 +1,54 @@
+# This workflow will install Python dependencies, run test and lint with a single version of Python
+# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
+
+name: CI
+
+on:
+  push:
+    branches: [ master ]
+  pull_request:
+    branches: [ master ]
+
+jobs:
+  type-check:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: '3.10'
+      - name: Test with mypy
+        run: |
+          pip install mypy
+          # Refer http://mypy-lang.blogspot.com/2021/06/mypy-0900-released.html
+          pip install mypy types-requests types-python-dateutil types-PyYAML types-dateparser types-protobuf types-pytz
+          mypy obsei
+
+  build-and-test:
+    needs: type-check
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ ubuntu-latest, macos-latest, windows-latest ]
+        python-version: ['3.8', '3.9', '3.10', '3.11']
+
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install '.[dev,all]'
+          pip install --upgrade --upgrade-strategy eager trafilatura
+          python -m spacy download en_core_web_lg
+          python -m spacy download en_core_web_sm
+
+      - name: Test with pytest
+        run: |
+          coverage run -m pytest
+          coverage report -m
diff --git a/obsei_module/obsei-master/.github/workflows/pypi_publish.yml b/obsei_module/obsei-master/.github/workflows/pypi_publish.yml
new file mode 100644
index 0000000000000000000000000000000000000000..316334bb75c8e3fff0974b52fa85ffadcbb0b289
--- /dev/null
+++ b/obsei_module/obsei-master/.github/workflows/pypi_publish.yml
@@ -0,0 +1,35 @@
+# This workflows will upload a Python Package using Twine when a release is created
+# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
+
+name: Upload Python Package
+
+on:
+  workflow_dispatch:
+  release:
+    types: [published]
+
+jobs:
+  deploy-pypi-artifact:
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Set up Python
+      uses: actions/setup-python@v5
+      with:
+        python-version: '3.8'
+
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install setuptools wheel twine hatch
+
+    - name: publish to PyPI
+      if: github.event_name != 'pull_request'
+      env:
+        TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
+        TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
+      run: |
+        hatch build
+        twine upload dist/*
diff --git a/obsei_module/obsei-master/.github/workflows/release_draft.yml b/obsei_module/obsei-master/.github/workflows/release_draft.yml
new file mode 100644
index 0000000000000000000000000000000000000000..2ed3737754610ea9c71896646975b34355580b4e
--- /dev/null
+++ b/obsei_module/obsei-master/.github/workflows/release_draft.yml
@@ -0,0 +1,15 @@
+name: release draft
+
+on:
+  workflow_dispatch:
+
+jobs:
+  draft-release:
+#    if: startsWith(github.ref, 'refs/tags/')
+    runs-on: ubuntu-latest
+    steps:
+      - uses: release-drafter/release-drafter@v6
+        with:
+          config-name: release-drafter.yml
+        env:
+          GITHUB_TOKEN: ${{ secrets.RELEASE_DRAFT_TOKEN }}
\ No newline at end of file
diff --git a/obsei_module/obsei-master/.github/workflows/sdk_docker_publish.yml b/obsei_module/obsei-master/.github/workflows/sdk_docker_publish.yml
new file mode 100644
index 0000000000000000000000000000000000000000..70daa5e698326d30b5d1cee9ba8a5e9213bda1b7
--- /dev/null
+++ b/obsei_module/obsei-master/.github/workflows/sdk_docker_publish.yml
@@ -0,0 +1,50 @@
+# This workflows will upload a Python Package using Twine when a release is created
+# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
+
+name: Publish SDK docker image
+
+on:
+  workflow_dispatch:
+    inputs:
+      tag:
+        description: 'Image tag'
+        required: true
+  release:
+    types: [published]
+
+jobs:
+  deploy-sdk-docker:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Docker meta
+      id: docker_meta
+      uses: docker/metadata-action@v5
+      with:
+        images: obsei/obsei-sdk
+
+    - name: Set up QEMU
+      uses: docker/setup-qemu-action@v3
+
+    - name: Set up Docker Buildx
+      uses: docker/setup-buildx-action@v3
+
+    - name: Login to DockerHub
+      if: github.event_name != 'pull_request'
+      uses: docker/login-action@v3
+      with:
+        username: ${{ secrets.DOCKERHUB_USERNAME }}
+        password: ${{ secrets.DOCKERHUB_TOKEN }}
+
+    - name: Build and push
+      uses: docker/build-push-action@v5
+      with:
+        context: ./
+        file: ./Dockerfile
+        push: ${{ github.event_name != 'pull_request' }}
+        tags: ${{ steps.docker_meta.outputs.tags }}
+        labels: ${{ steps.docker_meta.outputs.labels }}
+
+    - name: Image digest
+      run: echo ${{ steps.docker_build.outputs.digest }}
diff --git a/obsei_module/obsei-master/.github/workflows/ui_docker_publish.yml b/obsei_module/obsei-master/.github/workflows/ui_docker_publish.yml
new file mode 100644
index 0000000000000000000000000000000000000000..2fc690055b8d20c9462412352f2d75f8a6710447
--- /dev/null
+++ b/obsei_module/obsei-master/.github/workflows/ui_docker_publish.yml
@@ -0,0 +1,50 @@
+# This workflows will upload a Python Package using Twine when a release is created
+# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
+
+name: Publish UI Docker image
+
+on:
+  workflow_dispatch:
+    inputs:
+      tag:
+        description: 'Image tag'
+        required: true
+  release:
+    types: [published]
+
+jobs:
+  deploy-ui-docker:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Docker meta
+      id: docker_meta
+      uses: docker/metadata-action@v5
+      with:
+        images: obsei/obsei-ui-demo
+
+    - name: Set up QEMU
+      uses: docker/setup-qemu-action@v3
+
+    - name: Set up Docker Buildx
+      uses: docker/setup-buildx-action@v3
+
+    - name: Login to DockerHub
+      if: github.event_name != 'pull_request'
+      uses: docker/login-action@v3
+      with:
+        username: ${{ secrets.DOCKERHUB_USERNAME }}
+        password: ${{ secrets.DOCKERHUB_TOKEN }}
+
+    - name: Build and push
+      uses: docker/build-push-action@v5
+      with:
+        context: "{{defaultContext}}:sample-ui"
+        file: Dockerfile
+        push: ${{ github.event_name != 'pull_request' }}
+        tags: ${{ steps.docker_meta.outputs.tags }}
+        labels: ${{ steps.docker_meta.outputs.labels }}
+
+    - name: Image digest
+      run: echo ${{ steps.docker_build.outputs.digest }}
diff --git a/obsei_module/obsei-master/.gitignore b/obsei_module/obsei-master/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..80dd90d9cb4c179a40e922c4a9482c3afe64a999
--- /dev/null
+++ b/obsei_module/obsei-master/.gitignore
@@ -0,0 +1,148 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+/.idea/*
+*.db
+models*
+
+# OSX custom attributes
+.DS_Store
+
+# VS code configuration
+.vscode/*
diff --git a/obsei_module/obsei-master/.pre-commit-config.yaml b/obsei_module/obsei-master/.pre-commit-config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7631ed863745fe51f97e33d1b98b0aeb5ef43b70
--- /dev/null
+++ b/obsei_module/obsei-master/.pre-commit-config.yaml
@@ -0,0 +1,21 @@
+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.3.0
+    hooks:
+      - id: check-yaml
+      - id: trailing-whitespace
+      - id: requirements-txt-fixer
+      - id: end-of-file-fixer
+
+  - repo: https://github.com/psf/black
+    rev: 22.10.0
+    hooks:
+      - id: black
+
+  - repo: https://github.com/pre-commit/mirrors-mypy
+    rev: v0.991
+    hooks:
+      - id: mypy
+        args: [--ignore-missing-imports]
+        additional_dependencies: [types-all]
+        files: ^obsei/
diff --git a/obsei_module/obsei-master/.pyup.yml b/obsei_module/obsei-master/.pyup.yml
new file mode 100644
index 0000000000000000000000000000000000000000..b55ad548d5705a6c15d6f79192892e7612dbc2a3
--- /dev/null
+++ b/obsei_module/obsei-master/.pyup.yml
@@ -0,0 +1,5 @@
+# autogenerated pyup.io config file
+# see https://pyup.io/docs/configuration/ for all available options
+
+schedule: ''
+update: insecure
diff --git a/obsei_module/obsei-master/ATTRIBUTION.md b/obsei_module/obsei-master/ATTRIBUTION.md
new file mode 100644
index 0000000000000000000000000000000000000000..fc6f436d7be74b3ca7d9bbcdcd7d823fb52f7a2e
--- /dev/null
+++ b/obsei_module/obsei-master/ATTRIBUTION.md
@@ -0,0 +1,18 @@
+This could not have been possible without following open source software -
+- [searchtweets-v2](https://github.com/twitterdev/search-tweets-python): For Twitter's API v2 wrapper
+- [vaderSentiment](https://github.com/cjhutto/vaderSentiment): For rule-based sentiment analysis
+- [transformers](https://github.com/huggingface/transformers): For text-classification pipeline
+- [atlassian-python-api](https://github.com/atlassian-api/atlassian-python-api): To interact with Jira
+- [elasticsearch](https://github.com/elastic/elasticsearch-py): To interact with Elasticsearch
+- [pydantic](https://github.com/samuelcolvin/pydantic): For data validation
+- [sqlalchemy](https://github.com/sqlalchemy/sqlalchemy): As SQL toolkit to access DB storage
+- [google-play-scraper](https://github.com/JoMingyu/google-play-scraper): To fetch the Google Play Store review without authentication
+- [praw](https://github.com/praw-dev/praw): For Reddit client
+- [reddit-rss-reader](https://github.com/lalitpagaria/reddit-rss-reader): For Reddit scrapping
+- [app-store-reviews-reader](https://github.com/lalitpagaria/app_store_reviews_reader): For App Store reviews scrapping
+- [slack-sdk](https://github.com/slackapi/python-slack-sdk): For slack integration
+- [presidio-anonymizer](https://github.com/microsoft/presidio): Personal information anonymizer
+- [GoogleNews](https://github.com/Iceloof/GoogleNews): For Google News integration
+- [python-facebook-api](https://github.com/sns-sdks/python-facebook): For facebook integration
+- [youtube-comment-downloader](https://github.com/egbertbouman/youtube-comment-downloader): For Youtube video comments extraction code
+- [dateparser](https://github.com/scrapinghub/dateparser): To parse date properly (where format is ambiguous)
\ No newline at end of file
diff --git a/obsei_module/obsei-master/CITATION.cff b/obsei_module/obsei-master/CITATION.cff
new file mode 100644
index 0000000000000000000000000000000000000000..bd12a46b8dc05be975f138e2357ebef65de9ade3
--- /dev/null
+++ b/obsei_module/obsei-master/CITATION.cff
@@ -0,0 +1,14 @@
+# YAML 1.2
+---
+authors: 
+  -
+    family-names: Pagaria
+    given-names: Lalit
+
+cff-version: "1.1.0"
+license: "Apache-2.0"
+message: "If you use this software, please cite it using this metadata."
+repository-code: "https://github.com/obsei/obsei"
+title: "Obsei - a low code AI powered automation tool"
+version: "0.0.10"
+...
diff --git a/obsei_module/obsei-master/CNAME b/obsei_module/obsei-master/CNAME
new file mode 100644
index 0000000000000000000000000000000000000000..48c4fb7ad825704db946a83e64693071ebe454d7
--- /dev/null
+++ b/obsei_module/obsei-master/CNAME
@@ -0,0 +1 @@
+www.obsei.com
\ No newline at end of file
diff --git a/obsei_module/obsei-master/CODE_OF_CONDUCT.md b/obsei_module/obsei-master/CODE_OF_CONDUCT.md
new file mode 100644
index 0000000000000000000000000000000000000000..e8c5ad02324a0fa0778f625fd77f183f3c531ff7
--- /dev/null
+++ b/obsei_module/obsei-master/CODE_OF_CONDUCT.md
@@ -0,0 +1,128 @@
+# Contributor Covenant Code of Conduct
+
+## Our Pledge
+
+We as members, contributors, and leaders pledge to make participation in our
+community a harassment-free experience for everyone, regardless of age, body
+size, visible or invisible disability, ethnicity, sex characteristics, gender
+identity and expression, level of experience, education, socio-economic status,
+nationality, personal appearance, race, religion, or sexual identity
+and orientation.
+
+We pledge to act and interact in ways that contribute to an open, welcoming,
+diverse, inclusive, and healthy community.
+
+## Our Standards
+
+Examples of behavior that contributes to a positive environment for our
+community include:
+
+- Demonstrating empathy and kindness toward other people
+- Being respectful of differing opinions, viewpoints, and experiences
+- Giving and gracefully accepting constructive feedback
+- Accepting responsibility and apologizing to those affected by our mistakes,
+  and learning from the experience
+- Focusing on what is best not just for us as individuals, but for the
+  overall community
+
+Examples of unacceptable behavior include:
+
+- The use of sexualized language or imagery, and sexual attention or
+  advances of any kind
+- Trolling, insulting or derogatory comments, and personal or political attacks
+- Public or private harassment
+- Publishing others' private information, such as a physical or email
+  address, without their explicit permission
+- Other conduct which could reasonably be considered inappropriate in a
+  professional setting
+
+## Enforcement Responsibilities
+
+Community leaders are responsible for clarifying and enforcing our standards of
+acceptable behavior and will take appropriate and fair corrective action in
+response to any behavior that they deem inappropriate, threatening, offensive,
+or harmful.
+
+Community leaders have the right and responsibility to remove, edit, or reject
+comments, commits, code, wiki edits, issues, and other contributions that are
+not aligned to this Code of Conduct, and will communicate reasons for moderation
+decisions when appropriate.
+
+## Scope
+
+This Code of Conduct applies within all community spaces, and also applies when
+an individual is officially representing the community in public spaces.
+Examples of representing our community include using an official e-mail address,
+posting via an official social media account, or acting as an appointed
+representative at an online or offline event.
+
+## Enforcement
+
+Instances of abusive, harassing, or otherwise unacceptable behavior may be
+reported to the community leaders responsible for enforcement at
+obsei.tool@gmail.com
+All complaints will be reviewed and investigated promptly and fairly.
+
+All community leaders are obligated to respect the privacy and security of the
+reporter of any incident.
+
+## Enforcement Guidelines
+
+Community leaders will follow these Community Impact Guidelines in determining
+the consequences for any action they deem in violation of this Code of Conduct:
+
+### 1. Correction
+
+**Community Impact**: Use of inappropriate language or other behavior deemed
+unprofessional or unwelcome in the community.
+
+**Consequence**: A written warning from community leaders, providing
+clarity around the nature of the violation and an explanation of why the
+behavior was inappropriate. A public apology may be requested.
+
+### 2. Warning
+
+**Community Impact**: A violation through a single incident or series
+of actions.
+
+**Consequence**: A warning with consequences for continued behavior. No
+interaction with the people involved, including unsolicited interaction with
+those enforcing the Code of Conduct, for a specified period of time. This
+includes avoiding interactions in community spaces as well as external channels
+like social media. Violating these terms may lead to a temporary or
+permanent ban.
+
+### 3. Temporary Ban
+
+**Community Impact**: A serious violation of community standards, including
+sustained inappropriate behavior.
+
+**Consequence**: A temporary ban from any sort of interaction or public
+communication with the community for a specified period of time. No public or
+private interaction with the people involved, including unsolicited interaction
+with those enforcing the Code of Conduct, is allowed during this period.
+Violating these terms may lead to a permanent ban.
+
+### 4. Permanent Ban
+
+**Community Impact**: Demonstrating a pattern of violation of community
+standards, including sustained inappropriate behavior, harassment of an
+individual, or aggression toward or disparagement of classes of individuals.
+
+**Consequence**: A permanent ban from any sort of public interaction within
+the community.
+
+## Attribution
+
+This Code of Conduct is adapted from the [Contributor Covenant][homepage],
+version 2.0, available at
+https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
+
+Community Impact Guidelines were inspired by [Mozilla's code of conduct
+enforcement ladder](https://github.com/mozilla/diversity).
+
+[homepage]: https://www.contributor-covenant.org
+
+For answers to common questions about this code of conduct, see the FAQ at
+https://www.contributor-covenant.org/faq. Translations are available at
+https://www.contributor-covenant.org/translations.
diff --git a/obsei_module/obsei-master/CONTRIBUTING.md b/obsei_module/obsei-master/CONTRIBUTING.md
new file mode 100644
index 0000000000000000000000000000000000000000..f0afbb0a9ac896f8afb37430e389450efe498926
--- /dev/null
+++ b/obsei_module/obsei-master/CONTRIBUTING.md
@@ -0,0 +1,103 @@
+# 👐 Contributing to Obsei
+
+First off, thank you for even considering contributing to this package, every contribution big or small is greatly appreciated.
+Community contributions are what keep projects like this fueled and constantly improving, so a big thanks to you!
+
+Below are some sections detailing the guidelines we'd like you to follow to make your contribution as seamless as possible.
+
+- [Code of Conduct](#coc)
+- [Asking a Question and Discussions](#question)
+- [Issues, Bugs, and Feature Requests](#issue)
+- [Submission Guidelines](#submit)
+- [Code Style and Formatting](#code)
+- [Contributor License Agreement](#cla)
+
+## 📜 <a name="coc"></a> Code of Conduct
+
+The [Code of Conduct](https://github.com/obsei/obsei/blob/master/CODE_OF_CONDUCT.md) applies within all community spaces.
+If you are not familiar with our Code of Conduct policy, take a minute to read the policy before starting with your first contribution.
+
+## 🗣️ <a name="question"></a> Query or Discussion
+
+We would like to use [Github discussions](https://github.com/obsei/obsei/discussions) as the central hub for all
+community discussions, questions, and everything else in between. While Github discussions is a new service (as of 2021)
+we believe that it really helps keep this repo as one single source to find all relevant information. Our hope is that
+discussion page functions as a record of all the conversations that help contribute to the project's development.
+
+If you are new to [Github discussions](https://github.com/obsei/obsei/discussions) it is a very similar experience
+to Stack Overflow with an added element of general discussion and discourse rather than solely being question and answer based.
+
+## 🪲 <a name="issue"></a> Issues, Bugs, and Feature Requests
+
+We are very open to community contributions and appreciate anything that improves **Obsei**. This includes fixings typos, adding missing documentation, fixing bugs or adding new features.
+To avoid unnecessary work on either side, please stick to the following process:
+
+1. If you feel like your issue is not specific and more of a general question about a design decision, or algorithm implementation maybe start a [discussion](https://github.com/obsei/obsei/discussions) instead, this helps keep the issues less cluttered and encourages more open-ended conversation.
+2. Check if there is already [an related issue](https://github.com/obsei/obsei/issues).
+3. If there is not, open a new one to start a discussion. Some features might be a nice idea, but don't fit in the scope of Obsei and we hate to close finished PRs.
+4. If we came to the conclusion to move forward with your issue, we will be happy to accept a pull request. Make sure you create a pull request in an early draft version and ask for feedback.
+5. Verify that all tests in the CI pass (and add new ones if you implement anything new)
+
+See [below](#submit) for some guidelines.
+
+## ✉️ <a name="submit"></a> Submission Guidelines
+
+### Submitting an Issue
+
+Before you submit your issue search the archive, maybe your question was already answered.
+
+If your issue appears to be a bug, and hasn't been reported, open a new issue.
+Help us to maximize the effort we can spend fixing issues and adding new
+features, by not reporting duplicate issues. Providing the following information will increase the
+chances of your issue being dealt with quickly:
+
+- **Describe the bug** - A clear and concise description of what the bug is.
+- **To Reproduce**- Steps to reproduce the behavior.
+- **Expected behavior** - A clear and concise description of what you expected to happen.
+- **Environment**
+  - Obsei version
+  - Python version
+  - OS
+- **Suggest a Fix** - if you can't fix the bug yourself, perhaps you can point to what might be
+  causing the problem (line of code or commit)
+
+When you submit a PR you will be presented with a PR template, please fill this in as best you can.
+
+### Submitting a Pull Request
+
+Before you submit your pull request consider the following guidelines:
+
+- Search [GitHub](https://github.com/obsei/obsei/pulls) for an open or closed Pull Request
+  that relates to your submission. You don't want to duplicate effort.
+- Fork the main repo if not already done
+- Rebase fork with `upstream master`
+- Create new branch and add the changes in that branch
+- Add supporting test cases
+- Follow our [Coding Rules](#rules).
+- Avoid checking in files that shouldn't be tracked (e.g `dist`, `build`, `.tmp`, `.idea`).
+  We recommend using a [global](#global-gitignore) gitignore for this.
+- Before you commit please run the test suite and make sure all tests are passing.
+- Format your code appropriately:
+  - This package uses [black](https://black.readthedocs.io/en/stable/) as its formatter.
+    In order to format your code with black run `black . ` from the root of the package.
+- Run `pre-commit run --all-files` if you're adding new hooks to pre-commit config file. By default, pre-commit will run on modified files when commiting changes.
+- Commit your changes using a descriptive commit message.
+- In GitHub, send a pull request to `obsei:master`.
+- If we suggest changes then:
+  - Make the required updates.
+  - Rebase your branch and force push to your GitHub repository (this will update your Pull Request):
+
+That's it! Thank you for your contribution!
+
+## ✅ <a name="rules"></a> Coding Rules
+
+We generally follow the [Google Python style guide](http://google.github.io/styleguide/pyguide.html).
+
+## 📝 <a name="cla"></a> Contributor License Agreement
+
+That we do not have any potential problems later it is sadly necessary to sign a [Contributor License Agreement](CONTRIBUTOR_LICENSE_AGREEMENT.md). That can be done literally with the push of a button.
+
+---
+
+_This guide was inspired by the [transformers-interpret](https://github.com/cdpierse/transformers-interpret/blob/master/CONTRIBUTING.md), 
+[Haystack](https://github.com/deepset-ai/haystack/blob/master/CONTRIBUTING.md) and [n8n](https://github.com/n8n-io/n8n/blob/master/CONTRIBUTOR_LICENSE_AGREEMENT.md)_
diff --git a/obsei_module/obsei-master/CONTRIBUTOR_LICENSE_AGREEMENT.md b/obsei_module/obsei-master/CONTRIBUTOR_LICENSE_AGREEMENT.md
new file mode 100644
index 0000000000000000000000000000000000000000..8b4784f57938ed30cbc0de319b9c90df121b3632
--- /dev/null
+++ b/obsei_module/obsei-master/CONTRIBUTOR_LICENSE_AGREEMENT.md
@@ -0,0 +1,3 @@
+# Obsei Contributor License Agreement
+
+I give Obsei's Creator permission to license my contributions to any terms they like. I am giving them this license in order to make it possible for them to accept my contributions into their project.
\ No newline at end of file
diff --git a/obsei_module/obsei-master/Dockerfile b/obsei_module/obsei-master/Dockerfile
new file mode 100644
index 0000000000000000000000000000000000000000..b16cbfd7580a3c384e93b690c80a8e4812d1a57f
--- /dev/null
+++ b/obsei_module/obsei-master/Dockerfile
@@ -0,0 +1,38 @@
+# This is Docker file to Obsei SDK with dependencies installed
+FROM python:3.10-slim-bullseye
+
+RUN useradd --create-home user
+WORKDIR /home/user
+
+# env variable
+ENV PIP_DISABLE_PIP_VERSION_CHECK 1
+ENV PIP_NO_CACHE_DIR 1
+ENV WORKFLOW_SCRIPT '/home/user/obsei/process_workflow.py'
+ENV OBSEI_CONFIG_PATH ""
+ENV OBSEI_CONFIG_FILENAME ""
+
+
+# Hack to install jre on debian
+RUN mkdir -p /usr/share/man/man1
+
+# install few required tools
+RUN apt-get update && apt-get install -y --no-install-recommends curl git pkg-config cmake libncurses5 g++ \
+    && apt-get clean autoclean && apt-get autoremove -y \
+    && rm -rf /var/lib/{apt,dpkg,cache,log}/
+
+# install as a package
+COPY pyproject.toml README.md /home/user/
+RUN pip install --upgrade pip
+
+# copy README
+COPY README.md /home/user/
+
+# copy code
+COPY obsei /home/user/obsei
+RUN pip install -e .[all]
+
+
+USER user
+
+# cmd for running the API
+CMD ["sh", "-c", "python ${WORKFLOW_SCRIPT}"]
diff --git a/obsei_module/obsei-master/LICENSE b/obsei_module/obsei-master/LICENSE
new file mode 100644
index 0000000000000000000000000000000000000000..fecb6d71f505d183b3e4f5bbda806637c660d0f1
--- /dev/null
+++ b/obsei_module/obsei-master/LICENSE
@@ -0,0 +1,201 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright 2020-2022 Oraika Technologies Private Limited (https://www.oraika.com)
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/obsei_module/obsei-master/MANIFEST.in b/obsei_module/obsei-master/MANIFEST.in
new file mode 100644
index 0000000000000000000000000000000000000000..84c71247ce333d3b19e1265f4da3fd130972bc35
--- /dev/null
+++ b/obsei_module/obsei-master/MANIFEST.in
@@ -0,0 +1,3 @@
+include LICENSE
+include requirements.txt
+include README.md
diff --git a/obsei_module/obsei-master/README.md b/obsei_module/obsei-master/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..48c602f538183d2bba5f019e2c81cb32946cb71b
--- /dev/null
+++ b/obsei_module/obsei-master/README.md
@@ -0,0 +1,1067 @@
+<p align="center">
+    <img src="https://raw.githubusercontent.com/obsei/obsei-resources/master/images/obsei-flyer.png" />
+</p>
+
+---
+<p align="center">
+    <a href="https://www.oraika.com">
+            <img src="https://static.wixstatic.com/media/59bc4e_971f153f107e48c7912b9b2d4cd1b1a4~mv2.png/v1/fill/w_177,h_49,al_c,q_85,usm_0.66_1.00_0.01,enc_auto/3_edited.png" />
+    </a>
+</p>
+<p align="center">
+    <a href="https://github.com/obsei/obsei/actions">
+        <img alt="Test" src="https://github.com/obsei/obsei/workflows/CI/badge.svg?branch=master">
+    </a>
+    <a href="https://github.com/obsei/obsei/blob/master/LICENSE">
+        <img alt="License" src="https://img.shields.io/pypi/l/obsei">
+    </a>
+    <a href="https://pypi.org/project/obsei">
+        <img src="https://img.shields.io/pypi/pyversions/obsei" alt="PyPI - Python Version" />
+    </a>
+    <a href="https://pypi.org/project/obsei/">
+        <img alt="Release" src="https://img.shields.io/pypi/v/obsei">
+    </a>
+    <a href="https://pepy.tech/project/obsei">
+        <img src="https://pepy.tech/badge/obsei/month" alt="Downloads" />
+    </a>
+    <a href="https://huggingface.co/spaces/obsei/obsei-demo">
+        <img src="https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue" alt="HF Spaces" />
+    </a>
+    <a href="https://github.com/obsei/obsei/commits/master">
+        <img alt="Last commit" src="https://img.shields.io/github/last-commit/obsei/obsei">
+    </a>
+    <a href="https://github.com/obsei/obsei">
+        <img alt="Github stars" src="https://img.shields.io/github/stars/obsei/obsei?style=social">
+    </a>
+    <a href="https://www.youtube.com/channel/UCqdvgro1BzU13tkAfX3jCJA">
+        <img alt="YouTube Channel Subscribers" src="https://img.shields.io/youtube/channel/subscribers/UCqdvgro1BzU13tkAfX3jCJA?style=social">
+    </a>
+    <a href="https://join.slack.com/t/obsei-community/shared_invite/zt-r0wnuz02-FAkAmhTAUoc6pD4SLB9Ikg">
+        <img src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/Slack_join.svg" height="30">
+    </a>
+    <a href="https://twitter.com/ObseiAI">
+        <img src="https://img.shields.io/twitter/follow/ObseiAI?style=social">
+    </a>
+</p>
+
+---
+
+![](https://raw.githubusercontent.com/obsei/obsei-resources/master/gifs/obsei_flow.gif)
+
+---
+
+<span style="color:red">
+<b>Note</b>: Obsei is still in alpha stage hence carefully use it in Production. Also, as it is constantly undergoing development hence master branch may contain many breaking changes. Please use released version.
+</span>
+
+---
+
+**Obsei** (pronounced "Ob see" | /əb-'sē/) is an open-source, low-code, AI powered automation tool. _Obsei_ consists of -
+
+- **Observer**: Collect unstructured data from various sources like tweets from Twitter, Subreddit comments on Reddit, page post's comments from Facebook, App Stores reviews, Google reviews, Amazon reviews, News, Website, etc.
+- **Analyzer**: Analyze unstructured data collected with various AI tasks like classification, sentiment analysis, translation, PII, etc.
+- **Informer**: Send analyzed data to various destinations like ticketing platforms, data storage, dataframe, etc so that the user can take further actions and perform analysis on the data.
+
+All the Observers can store their state in databases (Sqlite, Postgres, MySQL, etc.), making Obsei suitable for scheduled jobs or serverless applications.
+
+![Obsei diagram](https://raw.githubusercontent.com/obsei/obsei-resources/master/images/Obsei_diagram.png)
+
+### Future direction -
+
+- Text, Image, Audio, Documents and Video oriented workflows
+- Collect data from every possible private and public channels
+- Add every possible workflow to an AI downstream application to automate manual cognitive workflows
+
+## Use cases
+
+_Obsei_ use cases are following, but not limited to -
+
+- Social listening: Listening about social media posts, comments, customer feedback, etc.
+- Alerting/Notification: To get auto-alerts for events such as customer complaints, qualified sales leads, etc.
+- Automatic customer issue creation based on customer complaints on Social Media, Email, etc.
+- Automatic assignment of proper tags to tickets based content of customer complaint for example login issue, sign up issue, delivery issue, etc.
+- Extraction of deeper insight from feedbacks on various platforms
+- Market research
+- Creation of dataset for various AI tasks
+- Many more based on creativity 💡
+
+## Installation
+
+### Prerequisite
+
+Install the following (if not present already) -
+
+- Install [Python 3.7+](https://www.python.org/downloads/)
+- Install [PIP](https://pip.pypa.io/en/stable/installing/)
+
+### Install Obsei
+
+You can install Obsei either via PIP or Conda based on your preference.
+To install latest released version -
+
+```shell
+pip install obsei[all]
+```
+
+Install from master branch (if you want to try the latest features) -
+
+```shell
+git clone https://github.com/obsei/obsei.git
+cd obsei
+pip install --editable .[all]
+```
+  
+Note: `all` option will install all the dependencies which might not be needed for your workflow, alternatively 
+following options are available to install minimal dependencies as per need -
+ - `pip install obsei[source]`: To install dependencies related to all observers
+ - `pip install obsei[sink]`: To install dependencies related to all informers
+ - `pip install obsei[analyzer]`:  To install dependencies related to all analyzers, it will install pytorch as well
+ - `pip install obsei[twitter-api]`: To install dependencies related to Twitter observer
+ - `pip install obsei[google-play-scraper]`: To install dependencies related to Play Store review scrapper observer
+ - `pip install obsei[google-play-api]`: To install dependencies related to Google official play store review API based observer
+ - `pip install obsei[app-store-scraper]`: To install dependencies related to Apple App Store review scrapper observer
+ - `pip install obsei[reddit-scraper]`: To install dependencies related to Reddit post and comment scrapper observer
+ - `pip install obsei[reddit-api]`: To install dependencies related to Reddit official api based observer
+ - `pip install obsei[pandas]`: To install dependencies related to TSV/CSV/Pandas based observer and informer
+ - `pip install obsei[google-news-scraper]`: To install dependencies related to Google news scrapper observer
+ - `pip install obsei[facebook-api]`: To install dependencies related to Facebook official page post and comments api based observer
+ - `pip install obsei[atlassian-api]`: To install dependencies related to Jira official api based informer
+ - `pip install obsei[elasticsearch]`: To install dependencies related to elasticsearch informer
+ - `pip install obsei[slack-api]`:To install dependencies related to Slack official api based informer
+
+You can also mix multiple dependencies together in single installation command. For example to install dependencies 
+Twitter observer, all analyzer, and Slack informer use following command -
+```shell
+pip install obsei[twitter-api, analyzer, slack-api]
+```
+
+
+## How to use
+
+Expand the following steps and create a workflow -
+
+<details><summary><b>Step 1: Configure Source/Observer</b></summary>
+
+<table ><tbody ><tr></tr><tr>
+<td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/twitter.png" width="20" height="20"><b>Twitter</b></summary><hr>
+
+```python
+from obsei.source.twitter_source import TwitterCredentials, TwitterSource, TwitterSourceConfig
+
+# initialize twitter source config
+source_config = TwitterSourceConfig(
+   keywords=["issue"], # Keywords, @user or #hashtags
+   lookup_period="1h", # Lookup period from current time, format: `<number><d|h|m>` (day|hour|minute)
+   cred_info=TwitterCredentials(
+       # Enter your twitter consumer key and secret. Get it from https://developer.twitter.com/en/apply-for-access
+       consumer_key="<twitter_consumer_key>",
+       consumer_secret="<twitter_consumer_secret>",
+       bearer_token='<ENTER BEARER TOKEN>',
+   )
+)
+
+# initialize tweets retriever
+source = TwitterSource()
+```
+
+</details>
+</td>
+</tr>
+<tr>
+<td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/Youtube.png" width="20" height="20"><b>Youtube Scrapper</b></summary><hr>
+
+```python
+from obsei.source.youtube_scrapper import YoutubeScrapperSource, YoutubeScrapperConfig
+
+# initialize Youtube source config
+source_config = YoutubeScrapperConfig(
+    video_url="https://www.youtube.com/watch?v=uZfns0JIlFk", # Youtube video URL
+    fetch_replies=True, # Fetch replies to comments
+    max_comments=10, # Total number of comments and replies to fetch
+    lookup_period="1Y", # Lookup period from current time, format: `<number><d|h|m|M|Y>` (day|hour|minute|month|year)
+)
+
+# initialize Youtube comments retriever
+source = YoutubeScrapperSource()
+```
+
+</details>
+</td>
+</tr>
+<tr>
+<td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/facebook.png" width="20" height="20"><b>Facebook</b></summary><hr>
+
+```python
+from obsei.source.facebook_source import FacebookCredentials, FacebookSource, FacebookSourceConfig
+
+# initialize facebook source config
+source_config = FacebookSourceConfig(
+   page_id="110844591144719", # Facebook page id, for example this one for Obsei
+   lookup_period="1h", # Lookup period from current time, format: `<number><d|h|m>` (day|hour|minute)
+   cred_info=FacebookCredentials(
+       # Enter your facebook app_id, app_secret and long_term_token. Get it from https://developers.facebook.com/apps/
+       app_id="<facebook_app_id>",
+       app_secret="<facebook_app_secret>",
+       long_term_token="<facebook_long_term_token>",
+   )
+)
+
+# initialize facebook post comments retriever
+source = FacebookSource()
+```
+
+</details>
+</td>
+</tr>
+<tr>
+<td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/gmail.png" width="20" height="20"><b>Email</b></summary><hr>
+
+```python
+from obsei.source.email_source import EmailConfig, EmailCredInfo, EmailSource
+
+# initialize email source config
+source_config = EmailConfig(
+   # List of IMAP servers for most commonly used email providers
+   # https://www.systoolsgroup.com/imap/
+   # Also, if you're using a Gmail account then make sure you allow less secure apps on your account -
+   # https://myaccount.google.com/lesssecureapps?pli=1
+   # Also enable IMAP access -
+   # https://mail.google.com/mail/u/0/#settings/fwdandpop
+   imap_server="imap.gmail.com", # Enter IMAP server
+   cred_info=EmailCredInfo(
+       # Enter your email account username and password
+       username="<email_username>",
+       password="<email_password>"
+   ),
+   lookup_period="1h" # Lookup period from current time, format: `<number><d|h|m>` (day|hour|minute)
+)
+
+# initialize email retriever
+source = EmailSource()
+```
+
+</details>
+</td>
+</tr>
+<tr>
+<td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/google_maps.png" width="20" height="20"><b>Google Maps Reviews Scrapper</b></summary><hr>
+
+```python
+from obsei.source.google_maps_reviews import OSGoogleMapsReviewsSource, OSGoogleMapsReviewsConfig
+
+# initialize Outscrapper Maps review source config
+source_config = OSGoogleMapsReviewsConfig(
+   # Collect API key from https://outscraper.com/
+   api_key="<Enter Your API Key>",
+   # Enter Google Maps link or place id
+   # For example below is for the "Taj Mahal"
+   queries=["https://www.google.co.in/maps/place/Taj+Mahal/@27.1751496,78.0399535,17z/data=!4m5!3m4!1s0x39747121d702ff6d:0xdd2ae4803f767dde!8m2!3d27.1751448!4d78.0421422"],
+   number_of_reviews=10,
+)
+
+
+# initialize Outscrapper Maps review retriever
+source = OSGoogleMapsReviewsSource()
+```
+
+</details>
+</td>
+</tr>
+<tr>
+<td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/appstore.png" width="20" height="20"><b>AppStore Reviews Scrapper</b></summary><hr>
+
+```python
+from obsei.source.appstore_scrapper import AppStoreScrapperConfig, AppStoreScrapperSource
+
+# initialize app store source config
+source_config = AppStoreScrapperConfig(
+   # Need two parameters app_id and country.
+   # `app_id` can be found at the end of the url of app in app store.
+   # For example - https://apps.apple.com/us/app/xcode/id497799835
+   # `310633997` is the app_id for xcode and `us` is country.
+   countries=["us"],
+   app_id="310633997",
+   lookup_period="1h" # Lookup period from current time, format: `<number><d|h|m>` (day|hour|minute)
+)
+
+
+# initialize app store reviews retriever
+source = AppStoreScrapperSource()
+```
+
+</details>
+</td>
+</tr>
+<tr>
+<td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/playstore.png" width="20" height="20"><b>Play Store Reviews Scrapper</b></summary><hr>
+
+```python
+from obsei.source.playstore_scrapper import PlayStoreScrapperConfig, PlayStoreScrapperSource
+
+# initialize play store source config
+source_config = PlayStoreScrapperConfig(
+   # Need two parameters package_name and country.
+   # `package_name` can be found at the end of the url of app in play store.
+   # For example - https://play.google.com/store/apps/details?id=com.google.android.gm&hl=en&gl=US
+   # `com.google.android.gm` is the package_name for xcode and `us` is country.
+   countries=["us"],
+   package_name="com.google.android.gm",
+   lookup_period="1h" # Lookup period from current time, format: `<number><d|h|m>` (day|hour|minute)
+)
+
+# initialize play store reviews retriever
+source = PlayStoreScrapperSource()
+```
+
+</details>
+</td>
+</tr>
+<tr>
+<td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/reddit.png" width="20" height="20"><b>Reddit</b></summary><hr>
+
+```python
+from obsei.source.reddit_source import RedditConfig, RedditSource, RedditCredInfo
+
+# initialize reddit source config
+source_config = RedditConfig(
+   subreddits=["wallstreetbets"], # List of subreddits
+   # Reddit account username and password
+   # You can also enter reddit client_id and client_secret or refresh_token
+   # Create credential at https://www.reddit.com/prefs/apps
+   # Also refer https://praw.readthedocs.io/en/latest/getting_started/authentication.html
+   # Currently Password Flow, Read Only Mode and Saved Refresh Token Mode are supported
+   cred_info=RedditCredInfo(
+       username="<reddit_username>",
+       password="<reddit_password>"
+   ),
+   lookup_period="1h" # Lookup period from current time, format: `<number><d|h|m>` (day|hour|minute)
+)
+
+# initialize reddit retriever
+source = RedditSource()
+```
+
+</details>
+</td>
+</tr>
+<tr>
+<td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/reddit.png" width="20" height="20"><b>Reddit Scrapper</b></summary><hr>
+
+<i>Note: Reddit heavily rate limit scrappers, hence use it to fetch small data during long period</i>
+
+```python
+from obsei.source.reddit_scrapper import RedditScrapperConfig, RedditScrapperSource
+
+# initialize reddit scrapper source config
+source_config = RedditScrapperConfig(
+   # Reddit subreddit, search etc rss url. For proper url refer following link -
+   # Refer https://www.reddit.com/r/pathogendavid/comments/tv8m9/pathogendavids_guide_to_rss_and_reddit/
+   url="https://www.reddit.com/r/wallstreetbets/comments/.rss?sort=new",
+   lookup_period="1h" # Lookup period from current time, format: `<number><d|h|m>` (day|hour|minute)
+)
+
+# initialize reddit retriever
+source = RedditScrapperSource()
+```
+
+</details>
+</td>
+</tr>
+<tr>
+<td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/googlenews.png" width="20" height="20"><b>Google News</b></summary><hr>
+
+```python
+from obsei.source.google_news_source import GoogleNewsConfig, GoogleNewsSource
+
+# initialize Google News source config
+source_config = GoogleNewsConfig(
+   query='bitcoin',
+   max_results=5,
+   # To fetch full article text enable `fetch_article` flag
+   # By default google news gives title and highlight
+   fetch_article=True,
+   # proxy='http://127.0.0.1:8080'
+)
+
+# initialize Google News retriever
+source = GoogleNewsSource()
+```
+
+</details>
+</td>
+</tr>
+<tr>
+<td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/webcrawler.png" width="20" height="20"><b>Web Crawler</b></summary><hr>
+
+```python
+from obsei.source.website_crawler_source import TrafilaturaCrawlerConfig, TrafilaturaCrawlerSource
+
+# initialize website crawler source config
+source_config = TrafilaturaCrawlerConfig(
+   urls=['https://obsei.github.io/obsei/']
+)
+
+# initialize website text retriever
+source = TrafilaturaCrawlerSource()
+```
+
+</details>
+</td>
+</tr>
+<tr>
+<td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/pandas.svg" width="20" height="20"><b>Pandas DataFrame</b></summary><hr>
+
+```python
+import pandas as pd
+from obsei.source.pandas_source import PandasSource, PandasSourceConfig
+
+# Initialize your Pandas DataFrame from your sources like csv, excel, sql etc
+# In following example we are reading csv which have two columns title and text
+csv_file = "https://raw.githubusercontent.com/deepset-ai/haystack/master/tutorials/small_generator_dataset.csv"
+dataframe = pd.read_csv(csv_file)
+
+# initialize pandas sink config
+sink_config = PandasSourceConfig(
+   dataframe=dataframe,
+   include_columns=["score"],
+   text_columns=["name", "degree"],
+)
+
+# initialize pandas sink
+sink = PandasSource()
+```
+
+</details>
+</td>
+</tr>
+</tbody>
+</table>
+
+</details>
+
+<details><summary><b>Step 2: Configure Analyzer</b></summary>
+
+<i>Note: To run transformers in an offline mode, check [transformers offline mode](https://huggingface.co/transformers/installation.html#offline-mode).</i>
+
+<p>Some analyzer support GPU and to utilize pass <b>device</b> parameter.
+List of possible values of <b>device</b> parameter (default value <i>auto</i>):
+<ol>
+    <li> <b>auto</b>: GPU (cuda:0) will be used if available otherwise CPU will be used
+    <li> <b>cpu</b>: CPU will be used
+    <li> <b>cuda:{id}</b> - GPU will be used with provided CUDA device id
+</ol>
+</p>
+
+<table ><tbody ><tr></tr><tr>
+<td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/classification.png" width="20" height="20"><b>Text Classification</b></summary><hr>
+
+Text classification: Classify text into user provided categories.
+
+```python
+from obsei.analyzer.classification_analyzer import ClassificationAnalyzerConfig, ZeroShotClassificationAnalyzer
+
+# initialize classification analyzer config
+# It can also detect sentiments if "positive" and "negative" labels are added.
+analyzer_config=ClassificationAnalyzerConfig(
+   labels=["service", "delay", "performance"],
+)
+
+# initialize classification analyzer
+# For supported models refer https://huggingface.co/models?filter=zero-shot-classification
+text_analyzer = ZeroShotClassificationAnalyzer(
+   model_name_or_path="typeform/mobilebert-uncased-mnli",
+   device="auto"
+)
+```
+
+</details>
+</td>
+</tr>
+<tr>
+<td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/sentiment.png" width="20" height="20"><b>Sentiment Analyzer</b></summary><hr>
+
+Sentiment Analyzer: Detect the sentiment of the text. Text classification can also perform sentiment analysis but if you don't want to use heavy-duty NLP model then use less resource hungry dictionary based Vader Sentiment detector.
+
+```python
+from obsei.analyzer.sentiment_analyzer import VaderSentimentAnalyzer
+
+# Vader does not need any configuration settings
+analyzer_config=None
+
+# initialize vader sentiment analyzer
+text_analyzer = VaderSentimentAnalyzer()
+```
+
+</details>
+</td>
+</tr>
+<tr>
+<td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/ner.png" width="20" height="20"><b>NER Analyzer</b></summary><hr>
+
+NER (Named-Entity Recognition) Analyzer: Extract information and classify named entities mentioned in text into pre-defined categories such as person names, organizations, locations, medical codes, time expressions, quantities, monetary values, percentages, etc
+
+```python
+from obsei.analyzer.ner_analyzer import NERAnalyzer
+
+# NER analyzer does not need configuration settings
+analyzer_config=None
+
+# initialize ner analyzer
+# For supported models refer https://huggingface.co/models?filter=token-classification
+text_analyzer = NERAnalyzer(
+   model_name_or_path="elastic/distilbert-base-cased-finetuned-conll03-english",
+   device = "auto"
+)
+```
+
+</details>
+</td>
+</tr>
+<tr>
+<td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/translator.png" width="20" height="20"><b>Translator</b></summary><hr>
+
+```python
+from obsei.analyzer.translation_analyzer import TranslationAnalyzer
+
+# Translator does not need analyzer config
+analyzer_config = None
+
+# initialize translator
+# For supported models refer https://huggingface.co/models?pipeline_tag=translation
+analyzer = TranslationAnalyzer(
+   model_name_or_path="Helsinki-NLP/opus-mt-hi-en",
+   device = "auto"
+)
+```
+
+</details>
+</td>
+</tr>
+<tr>
+<td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/pii.png" width="20" height="20"><b>PII Anonymizer</b></summary><hr>
+
+```python
+from obsei.analyzer.pii_analyzer import PresidioEngineConfig, PresidioModelConfig, \
+   PresidioPIIAnalyzer, PresidioPIIAnalyzerConfig
+
+# initialize pii analyzer's config
+analyzer_config = PresidioPIIAnalyzerConfig(
+   # Whether to return only pii analysis or anonymize text
+   analyze_only=False,
+   # Whether to return detail information about anonymization decision
+   return_decision_process=True
+)
+
+# initialize pii analyzer
+analyzer = PresidioPIIAnalyzer(
+   engine_config=PresidioEngineConfig(
+       # spacy and stanza nlp engines are supported
+       # For more info refer
+       # https://microsoft.github.io/presidio/analyzer/developing_recognizers/#utilize-spacy-or-stanza
+       nlp_engine_name="spacy",
+       # Update desired spacy model and language
+       models=[PresidioModelConfig(model_name="en_core_web_lg", lang_code="en")]
+   )
+)
+```
+
+</details>
+</td>
+</tr>
+<tr>
+<td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/dummy.png" width="20" height="20"><b>Dummy Analyzer</b></summary><hr>
+
+Dummy Analyzer: Does nothing. Its simply used for transforming the input (TextPayload) to output (TextPayload) and adding the user supplied dummy data.
+
+```python
+from obsei.analyzer.dummy_analyzer import DummyAnalyzer, DummyAnalyzerConfig
+
+# initialize dummy analyzer's configuration settings
+analyzer_config = DummyAnalyzerConfig()
+
+# initialize dummy analyzer
+analyzer = DummyAnalyzer()
+```
+
+</details>
+</td>
+</tr>
+</tbody>
+</table>
+
+</details>
+
+<details><summary><b>Step 3: Configure Sink/Informer</b></summary>
+
+<table ><tbody ><tr></tr><tr>
+<td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/slack.svg" width="25" height="25"><b>Slack</b></summary><hr>
+
+```python
+from obsei.sink.slack_sink import SlackSink, SlackSinkConfig
+
+# initialize slack sink config
+sink_config = SlackSinkConfig(
+   # Provide slack bot/app token
+   # For more detail refer https://slack.com/intl/en-de/help/articles/215770388-Create-and-regenerate-API-tokens
+   slack_token="<Slack_app_token>",
+   # To get channel id refer https://stackoverflow.com/questions/40940327/what-is-the-simplest-way-to-find-a-slack-team-id-and-a-channel-id
+   channel_id="C01LRS6CT9Q"
+)
+
+# initialize slack sink
+sink = SlackSink()
+```
+
+</details>
+</td>
+</tr>
+<tr>
+<td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/zendesk.png" width="20" height="20"><b>Zendesk</b></summary><hr>
+
+```python
+from obsei.sink.zendesk_sink import ZendeskSink, ZendeskSinkConfig, ZendeskCredInfo
+
+# initialize zendesk sink config
+sink_config = ZendeskSinkConfig(
+   # provide zendesk domain
+   domain="zendesk.com",
+   # provide subdomain if you have one
+   subdomain=None,
+   # Enter zendesk user details
+   cred_info=ZendeskCredInfo(
+       email="<zendesk_user_email>",
+       password="<zendesk_password>"
+   )
+)
+
+# initialize zendesk sink
+sink = ZendeskSink()
+```
+
+</details>
+</td>
+</tr>
+<tr>
+<td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/jira.png" width="20" height="20"><b>Jira</b></summary><hr>
+
+```python
+from obsei.sink.jira_sink import JiraSink, JiraSinkConfig
+
+# For testing purpose you can start jira server locally
+# Refer https://developer.atlassian.com/server/framework/atlassian-sdk/atlas-run-standalone/
+
+# initialize Jira sink config
+sink_config = JiraSinkConfig(
+   url="http://localhost:2990/jira", # Jira server url
+    # Jira username & password for user who have permission to create issue
+   username="<username>",
+   password="<password>",
+   # Which type of issue to be created
+   # For more information refer https://support.atlassian.com/jira-cloud-administration/docs/what-are-issue-types/
+   issue_type={"name": "Task"},
+   # Under which project issue to be created
+   # For more information refer https://support.atlassian.com/jira-software-cloud/docs/what-is-a-jira-software-project/
+   project={"key": "CUS"},
+)
+
+# initialize Jira sink
+sink = JiraSink()
+```
+
+</details>
+</td>
+</tr>
+<tr>
+<td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/elastic.png" width="20" height="20"><b>ElasticSearch</b></summary><hr>
+
+```python
+from obsei.sink.elasticsearch_sink import ElasticSearchSink, ElasticSearchSinkConfig
+
+# For testing purpose you can start Elasticsearch server locally via docker
+# `docker run -d --name elasticsearch -p 9200:9200 -e "discovery.type=single-node" elasticsearch:8.5.0`
+
+# initialize Elasticsearch sink config
+sink_config = ElasticSearchSinkConfig(
+   # Elasticsearch server
+   hosts="http://localhost:9200",
+   # Index name, it will create if not exist
+   index_name="test",
+)
+
+# initialize Elasticsearch sink
+sink = ElasticSearchSink()
+```
+
+</details>
+</td>
+</tr>
+<tr>
+<td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/http_api.png" width="20" height="20"><b>Http</b></summary><hr>
+
+```python
+from obsei.sink.http_sink import HttpSink, HttpSinkConfig
+
+# For testing purpose you can create mock http server via postman
+# For more details refer https://learning.postman.com/docs/designing-and-developing-your-api/mocking-data/setting-up-mock/
+
+# initialize http sink config (Currently only POST call is supported)
+sink_config = HttpSinkConfig(
+   # provide http server url
+   url="https://localhost:8080/api/path",
+   # Here you can add headers you would like to pass with request
+   headers={
+       "Content-type": "application/json"
+   }
+)
+
+# To modify or converting the payload, create convertor class
+# Refer obsei.sink.dailyget_sink.PayloadConvertor for example
+
+# initialize http sink
+sink = HttpSink()
+```
+
+</details>
+</td>
+</tr>
+<tr>
+<td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/pandas.svg" width="20" height="20"><b>Pandas DataFrame</b></summary><hr>
+
+```python
+from pandas import DataFrame
+from obsei.sink.pandas_sink import PandasSink, PandasSinkConfig
+
+# initialize pandas sink config
+sink_config = PandasSinkConfig(
+   dataframe=DataFrame()
+)
+
+# initialize pandas sink
+sink = PandasSink()
+```
+
+</details>
+</td>
+</tr>
+<tr>
+<td><details ><summary><img style="vertical-align:middle;margin:2px 10px" src="https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/logger.png" width="20" height="20"><b>Logger</b></summary><hr>
+
+This is useful for testing and dry running the pipeline.
+
+```python
+from obsei.sink.logger_sink import LoggerSink, LoggerSinkConfig
+import logging
+import sys
+
+logger = logging.getLogger("Obsei")
+logging.basicConfig(stream=sys.stdout, level=logging.INFO)
+
+# initialize logger sink config
+sink_config = LoggerSinkConfig(
+   logger=logger,
+   level=logging.INFO
+)
+
+# initialize logger sink
+sink = LoggerSink()
+```
+
+</details>
+</td>
+</tr>
+</tbody>
+</table>
+
+</details>
+
+<details><summary><b>Step 4: Join and create workflow</b></summary>
+
+`source` will fetch data from the selected source, then feed it to the `analyzer` for processing, whose output we feed into a `sink` to get notified at that sink.
+
+```python
+# Uncomment if you want logger
+# import logging
+# import sys
+# logger = logging.getLogger(__name__)
+# logging.basicConfig(stream=sys.stdout, level=logging.INFO)
+
+# This will fetch information from configured source ie twitter, app store etc
+source_response_list = source.lookup(source_config)
+
+# Uncomment if you want to log source response
+# for idx, source_response in enumerate(source_response_list):
+#     logger.info(f"source_response#'{idx}'='{source_response.__dict__}'")
+
+# This will execute analyzer (Sentiment, classification etc) on source data with provided analyzer_config
+analyzer_response_list = text_analyzer.analyze_input(
+    source_response_list=source_response_list,
+    analyzer_config=analyzer_config
+)
+
+# Uncomment if you want to log analyzer response
+# for idx, an_response in enumerate(analyzer_response_list):
+#    logger.info(f"analyzer_response#'{idx}'='{an_response.__dict__}'")
+
+# Analyzer output added to segmented_data
+# Uncomment to log it
+# for idx, an_response in enumerate(analyzer_response_list):
+#    logger.info(f"analyzed_data#'{idx}'='{an_response.segmented_data.__dict__}'")
+
+# This will send analyzed output to configure sink ie Slack, Zendesk etc
+sink_response_list = sink.send_data(analyzer_response_list, sink_config)
+
+# Uncomment if you want to log sink response
+# for sink_response in sink_response_list:
+#     if sink_response is not None:
+#         logger.info(f"sink_response='{sink_response}'")
+```
+
+</details>
+
+<details><summary><b>Step 5: Execute workflow</b></summary>
+Copy the code snippets from <b>Steps 1 to 4</b> into a python file, for example <code>example.py</code> and execute the following command -
+
+```shell
+python example.py
+```
+
+</details>
+
+## Demo
+
+We have a minimal [streamlit](https://streamlit.io/) based UI that you can use to test Obsei.
+
+![Screenshot](https://raw.githubusercontent.com/obsei/obsei-resources/master/images/obsei-ui-demo.png)
+
+### Watch UI demo video
+
+[![Introductory and demo video](https://img.youtube.com/vi/GTF-Hy96gvY/2.jpg)](https://www.youtube.com/watch?v=GTF-Hy96gvY)
+
+Check demo at [![](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/obsei/obsei-demo)
+
+(**Note**: Sometimes the Streamlit demo might not work due to rate limiting, use the docker image (locally) in such cases.)
+
+To test locally, just run
+
+```
+docker run -d --name obesi-ui -p 8501:8501 obsei/obsei-ui-demo
+
+# You can find the UI at http://localhost:8501
+```
+
+**To run Obsei workflow easily using GitHub Actions (no sign ups and cloud hosting required), refer to this [repo](https://github.com/obsei/demo-workflow-action)**.
+
+## Companies/Projects using Obsei
+
+Here are some companies/projects (alphabetical order) using Obsei. To add your company/project to the list, please raise a PR or contact us via [email](contact@obsei.com).
+
+- [Oraika](https://www.oraika.com): Contextually understand customer feedback
+- [1Page](https://www.get1page.com/): Giving a better context in meetings and calls
+- [Spacepulse](http://spacepulse.in/): The operating system for spaces
+- [Superblog](https://superblog.ai/): A blazing fast alternative to WordPress and Medium
+- [Zolve](https://zolve.com/): Creating a financial world beyond borders
+- [Utilize](https://www.utilize.app/): No-code app builder for businesses with a deskless workforce
+
+## Articles
+
+<table>
+<thead>
+<tr class="header">
+<th>Sr. No.</th>
+<th>Title</th>
+<th>Author</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>1</td>
+<td>
+    <a href="https://reenabapna.medium.com/ai-based-comparative-customer-feedback-analysis-using-deep-learning-models-def0dc77aaee">AI based Comparative Customer Feedback Analysis Using Obsei</a>
+</td>
+<td>
+    <a href="linkedin.com/in/reena-bapna-66a8691a">Reena Bapna</a>
+</td>
+</tr>
+<tr>
+<td>2</td>
+<td>
+    <a href="https://medium.com/mlearning-ai/linkedin-app-user-feedback-analysis-9c9f98464daa">LinkedIn App - User Feedback Analysis</a>
+</td>
+<td>
+    <a href="http://www.linkedin.com/in/himanshusharmads">Himanshu Sharma</a>
+</td>
+</tr>
+</tbody>
+</table>
+
+## Tutorials
+
+<table>
+<thead>
+<tr class="header">
+<th>Sr. No.</th>
+<th>Workflow</th>
+<th>Colab</th>
+<th>Binder</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td rowspan="2">1</td>
+<td colspan="3">Observe app reviews from Google play store, Analyze them by performing text classification and then Inform them on console via logger</td>
+</tr>
+<tr>
+<td>PlayStore Reviews → Classification → Logger</td>
+<td>
+    <a href="https://colab.research.google.com/github/obsei/obsei/blob/master/tutorials/01_PlayStore_Classification_Logger.ipynb">
+        <img alt="Colab" src="https://colab.research.google.com/assets/colab-badge.svg">
+    </a>
+</td>
+<td>
+    <a href="https://mybinder.org/v2/gh/obsei/obsei/HEAD?filepath=tutorials%2F01_PlayStore_Classification_Logger.ipynb">
+        <img alt="Colab" src="https://mybinder.org/badge_logo.svg">
+    </a>
+</td>
+</tr>
+<tr>
+<td rowspan="2">2</td>
+<td colspan="3">Observe app reviews from Google play store, PreProcess text via various text cleaning functions, Analyze them by performing text classification, Inform them to Pandas DataFrame and store resultant CSV to Google Drive</td>
+</tr>
+<tr>
+<td>PlayStore Reviews → PreProcessing → Classification → Pandas DataFrame → CSV in Google Drive</td>
+<td>
+    <a href="https://colab.research.google.com/github/obsei/obsei/blob/master/tutorials/02_PlayStore_PreProc_Classification_Pandas.ipynb">
+        <img alt="Colab" src="https://colab.research.google.com/assets/colab-badge.svg">
+    </a>
+</td>
+<td>
+    <a href="https://mybinder.org/v2/gh/obsei/obsei/HEAD?filepath=tutorials%2F02_PlayStore_PreProc_Classification_Pandas.ipynb">
+        <img alt="Colab" src="https://mybinder.org/badge_logo.svg">
+    </a>
+</td>
+</tr>
+<tr>
+<td rowspan="2">3</td>
+<td colspan="3">Observe app reviews from Apple app store, PreProcess text via various text cleaning function, Analyze them by performing text classification, Inform them to Pandas DataFrame and store resultant CSV to Google Drive</td>
+</tr>
+<tr>
+<td>AppStore Reviews → PreProcessing → Classification → Pandas DataFrame → CSV in Google Drive</td>
+<td>
+    <a href="https://colab.research.google.com/github/obsei/obsei/blob/master/tutorials/03_AppStore_PreProc_Classification_Pandas.ipynb">
+        <img alt="Colab" src="https://colab.research.google.com/assets/colab-badge.svg">
+    </a>
+</td>
+<td>
+    <a href="https://mybinder.org/v2/gh/obsei/obsei/HEAD?filepath=tutorials%2F03_AppStore_PreProc_Classification_Pandas.ipynb">
+        <img alt="Colab" src="https://mybinder.org/badge_logo.svg">
+    </a>
+</td>
+</tr>
+<tr>
+<td rowspan="2">4</td>
+<td colspan="3">Observe news article from Google news, PreProcess text via various text cleaning function, Analyze them via performing text classification while splitting text in small chunks and later computing final inference using given formula</td>
+</tr>
+<tr>
+<td>Google News → Text Cleaner → Text Splitter → Classification → Inference Aggregator</td>
+<td>
+    <a href="https://colab.research.google.com/github/obsei/obsei/blob/master/tutorials/04_GoogleNews_Cleaner_Splitter_Classification_Aggregator.ipynb">
+        <img alt="Colab" src="https://colab.research.google.com/assets/colab-badge.svg">
+    </a>
+</td>
+<td>
+    <a href="https://mybinder.org/v2/gh/obsei/obsei/HEAD?filepath=tutorials%2F04_GoogleNews_Cleaner_Splitter_Classification_Aggregator.ipynb">
+        <img alt="Colab" src="https://mybinder.org/badge_logo.svg">
+    </a>
+</td>
+</tr>
+</tbody>
+</table>
+
+<details><summary><b>💡Tips: Handle large text classification via Obsei</b></summary>
+
+![](https://raw.githubusercontent.com/obsei/obsei-resources/master/gifs/Long_Text_Classification.gif)
+
+</details>
+
+## Documentation
+
+For detailed installation instructions, usages and examples, refer to our [documentation](https://obsei.github.io/obsei/).
+
+## Support and Release Matrix
+
+<table>
+<thead>
+<tr class="header">
+<th></th>
+<th>Linux</th>
+<th>Mac</th>
+<th>Windows</th>
+<th>Remark</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Tests</td>
+<td style="text-align:center">✅</td>
+<td style="text-align:center">✅</td>
+<td style="text-align:center">✅</td>
+<td>Low Coverage as difficult to test 3rd party libs</td>
+</tr>
+<tr>
+<td>PIP</td>
+<td style="text-align:center">✅</td>
+<td style="text-align:center">✅</td>
+<td style="text-align:center">✅</td>
+<td>Fully Supported</td>
+</tr>
+<tr>
+<td>Conda</td>
+<td style="text-align:center">❌</td>
+<td style="text-align:center">❌</td>
+<td style="text-align:center">❌</td>
+<td>Not Supported</td>
+</tr>
+</tbody>
+</table>
+
+## Discussion forum
+
+Discussion about _Obsei_ can be done at [community forum](https://github.com/obsei/obsei/discussions)
+
+## Changelogs
+
+Refer [releases](https://github.com/obsei/obsei/releases) for changelogs
+
+## Security Issue
+
+For any security issue please contact us via [email](mailto:contact@oraika.com)
+
+## Stargazers over time
+
+[![Stargazers over time](https://starchart.cc/obsei/obsei.svg)](https://starchart.cc/obsei/obsei)
+
+## Maintainers
+
+This project is being maintained by [Oraika Technologies](https://www.oraika.com). [Lalit Pagaria](https://github.com/lalitpagaria) and [Girish Patel](https://github.com/GirishPatel) are maintainers of this project.
+
+## License
+
+- Copyright holder: [Oraika Technologies](https://www.oraika.com)
+- Overall Apache 2.0 and you can read [License](https://github.com/obsei/obsei/blob/master/LICENSE) file.
+- Multiple other secondary permissive or weak copyleft licenses (LGPL, MIT, BSD etc.) for third-party components refer [Attribution](https://github.com/obsei/obsei/blob/master/ATTRIBUTION.md).
+- To make project more commercial friendly, we void third party components which have strong copyleft licenses (GPL, AGPL etc.) into the project.
+
+## Attribution
+
+This could not have been possible without these [open source softwares](https://github.com/obsei/obsei/blob/master/ATTRIBUTION.md).
+
+## Contribution
+
+First off, thank you for even considering contributing to this package, every contribution big or small is greatly appreciated.
+Please refer our [Contribution Guideline](https://github.com/obsei/obsei/blob/master/CONTRIBUTING.md) and [Code of Conduct](https://github.com/obsei/obsei/blob/master/CODE_OF_CONDUCT.md).
+
+Thanks so much to all our contributors
+
+<a href="https://github.com/obsei/obsei/graphs/contributors">
+  <img src="https://contrib.rocks/image?repo=obsei/obsei" />
+</a>
diff --git a/obsei_module/obsei-master/SECURITY.md b/obsei_module/obsei-master/SECURITY.md
new file mode 100644
index 0000000000000000000000000000000000000000..40ce33e3996ab24222f9c236fe167128c507ed6e
--- /dev/null
+++ b/obsei_module/obsei-master/SECURITY.md
@@ -0,0 +1,5 @@
+# Security Policy
+
+## Reporting a Vulnerability
+
+For any security issue please report it via [email](mailto:contact@oraika.com).
diff --git a/obsei_module/obsei-master/_config.yml b/obsei_module/obsei-master/_config.yml
new file mode 100644
index 0000000000000000000000000000000000000000..0b55420d431480b1c3f2d4515c45b47c2e0625df
--- /dev/null
+++ b/obsei_module/obsei-master/_config.yml
@@ -0,0 +1,9 @@
+theme: jekyll-theme-primer
+markdown: CommonMarkGhPages
+commonmark:
+  options: ["UNSAFE", "SMART", "FOOTNOTES"]
+  extensions: ["strikethrough", "autolink", "table", "tagfilter"]
+title: "Obsei: An open-source low-code AI powered automation tool"
+description: "Obsei is an open-source low-code AI powered automation tool"
+
+google_analytics: G-0E2FTKBK4T
diff --git a/obsei_module/obsei-master/_includes/head-custom-google-analytics.html b/obsei_module/obsei-master/_includes/head-custom-google-analytics.html
new file mode 100644
index 0000000000000000000000000000000000000000..360ca261d4caea0b2597b4d53b2e95605b341b86
--- /dev/null
+++ b/obsei_module/obsei-master/_includes/head-custom-google-analytics.html
@@ -0,0 +1,9 @@
+<!-- Global site tag (gtag.js) - Google Analytics -->
+<script async src="https://www.googletagmanager.com/gtag/js?id=G-0E2FTKBK4T"></script>
+<script>
+  window.dataLayer = window.dataLayer || [];
+  function gtag(){dataLayer.push(arguments);}
+  gtag('js', new Date());
+
+  gtag('config', 'G-0E2FTKBK4T');
+</script>
diff --git a/obsei_module/obsei-master/binder/requirements.txt b/obsei_module/obsei-master/binder/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c436e37c0702f46f8deb36b9deed2d3fb8491296
--- /dev/null
+++ b/obsei_module/obsei-master/binder/requirements.txt
@@ -0,0 +1,2 @@
+git+https://github.com/obsei/obsei@master#egg=obsei[all]
+trafilatura
diff --git a/obsei_module/obsei-master/example/app_store_scrapper_example.py b/obsei_module/obsei-master/example/app_store_scrapper_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..fcbf9bb1a4a24527319dda4b130a41f7cb12f549
--- /dev/null
+++ b/obsei_module/obsei-master/example/app_store_scrapper_example.py
@@ -0,0 +1,41 @@
+import logging
+import sys
+from datetime import datetime, timedelta
+
+import pytz
+
+from obsei.analyzer.classification_analyzer import ClassificationAnalyzerConfig, ZeroShotClassificationAnalyzer
+from obsei.misc.utils import DATETIME_STRING_PATTERN
+from obsei.source.appstore_scrapper import (
+    AppStoreScrapperConfig,
+    AppStoreScrapperSource,
+)
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(stream=sys.stdout, level=logging.INFO)
+
+since_time = datetime.utcnow().astimezone(pytz.utc) + timedelta(days=-5)
+source_config = AppStoreScrapperConfig(
+    app_url='https://apps.apple.com/us/app/gmail-email-by-google/id422689480',
+    lookup_period=since_time.strftime(DATETIME_STRING_PATTERN),
+    max_count=10,
+)
+
+source = AppStoreScrapperSource()
+
+text_analyzer = ZeroShotClassificationAnalyzer(
+    model_name_or_path="typeform/mobilebert-uncased-mnli", device="auto"
+)
+
+source_response_list = source.lookup(source_config)
+for idx, source_response in enumerate(source_response_list):
+    logger.info(f"source_response#'{idx}'='{source_response.__dict__}'")
+
+analyzer_response_list = text_analyzer.analyze_input(
+    source_response_list=source_response_list,
+    analyzer_config=ClassificationAnalyzerConfig(
+        labels=["interface", "slow", "battery"],
+    ),
+)
+for idx, an_response in enumerate(analyzer_response_list):
+    logger.info(f"analyzer_response#'{idx}'='{an_response.__dict__}'")
diff --git a/obsei_module/obsei-master/example/daily_get_example.py b/obsei_module/obsei-master/example/daily_get_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..5b8209b21f1113035aa7f45a3b419e77fbac73e2
--- /dev/null
+++ b/obsei_module/obsei-master/example/daily_get_example.py
@@ -0,0 +1,77 @@
+import logging
+import os
+import sys
+from pathlib import Path
+
+from obsei.sink.dailyget_sink import DailyGetSink, DailyGetSinkConfig
+from obsei.source.twitter_source import TwitterSource, TwitterSourceConfig
+from obsei.analyzer.classification_analyzer import (
+    ClassificationAnalyzerConfig,
+    ZeroShotClassificationAnalyzer,
+)
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(stream=sys.stdout, level=logging.INFO)
+
+sink_config = DailyGetSinkConfig(
+    url=os.environ["DAILYGET_URL"],
+    partner_id=os.environ["DAILYGET_PARTNER_ID"],
+    consumer_phone_number=os.environ["DAILYGET_CONSUMER_NUMBER"],
+    source_information="Twitter " + os.environ["DAILYGET_QUERY"],
+    base_payload={
+        "partnerId": os.environ["DAILYGET_PARTNER_ID"],
+        "consumerPhoneNumber": os.environ["DAILYGET_CONSUMER_NUMBER"],
+    },
+)
+
+dir_path = Path(__file__).resolve().parent.parent
+source_config = TwitterSourceConfig(
+    keywords=[os.environ["DAILYGET_QUERY"]],
+    lookup_period=os.environ["DAILYGET_LOOKUP_PERIOD"],
+    tweet_fields=[
+        "author_id",
+        "conversation_id",
+        "created_at",
+        "id",
+        "public_metrics",
+        "text",
+    ],
+    user_fields=["id", "name", "public_metrics", "username", "verified"],
+    expansions=["author_id"],
+    place_fields=None,
+    max_tweets=10,
+)
+
+source = TwitterSource()
+sink = DailyGetSink()
+text_analyzer = ZeroShotClassificationAnalyzer(
+    model_name_or_path="joeddav/bart-large-mnli-yahoo-answers",
+    #   model_name_or_path="joeddav/xlm-roberta-large-xnli",
+)
+
+source_response_list = source.lookup(source_config)
+for idx, source_response in enumerate(source_response_list):
+    logger.info(f"source_response#'{idx}'='{source_response.__dict__}'")
+
+analyzer_response_list = text_analyzer.analyze_input(
+    source_response_list=source_response_list,
+    analyzer_config=ClassificationAnalyzerConfig(
+        labels=[
+            "service",
+            "delay",
+            "tracking",
+            "no response",
+            "missing items",
+            "delivery",
+            "mask",
+        ],
+    ),
+)
+for idx, an_response in enumerate(analyzer_response_list):
+    logger.info(f"analyzer_response#'{idx}'='{an_response.__dict__}'")
+
+# HTTP Sink
+sink_response_list = sink.send_data(analyzer_response_list, sink_config)
+for sink_response in sink_response_list:
+    if sink_response is not None:
+        logger.info(f"sink_response='{sink_response.__dict__}'")
diff --git a/obsei_module/obsei-master/example/elasticsearch_example.py b/obsei_module/obsei-master/example/elasticsearch_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..81bc7594ab85cf66d6259d7b41c15ebf12c473fc
--- /dev/null
+++ b/obsei_module/obsei-master/example/elasticsearch_example.py
@@ -0,0 +1,69 @@
+import logging
+import sys
+from pathlib import Path
+
+from obsei.sink.elasticsearch_sink import ElasticSearchSink, ElasticSearchSinkConfig
+from obsei.source.twitter_source import TwitterSource, TwitterSourceConfig
+from obsei.analyzer.classification_analyzer import (
+    ClassificationAnalyzerConfig,
+    ZeroShotClassificationAnalyzer,
+)
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(stream=sys.stdout, level=logging.INFO)
+
+dir_path = Path(__file__).resolve().parent.parent
+source_config = TwitterSourceConfig(
+    keywords="@Handle",
+    lookup_period="1h",  # 1 Hour
+    tweet_fields=[
+        "author_id",
+        "conversation_id",
+        "created_at",
+        "id",
+        "public_metrics",
+        "text",
+    ],
+    user_fields=["id", "name", "public_metrics", "username", "verified"],
+    expansions=["author_id"],
+    place_fields=None,
+    max_tweets=10,
+)
+
+source = TwitterSource()
+text_analyzer = ZeroShotClassificationAnalyzer(
+    model_name_or_path="joeddav/bart-large-mnli-yahoo-answers",
+)
+
+# Start Elasticsearch server locally
+# `docker run -d --name elasticsearch -p 9200:9200 -e "discovery.type=single-node" elasticsearch:7.9.2`
+sink_config = ElasticSearchSinkConfig(
+    host="localhost",
+    port=9200,
+    index_name="test",
+)
+
+source_response_list = source.lookup(source_config)
+for idx, source_response in enumerate(source_response_list):
+    logger.info(f"source_response#'{idx}'='{source_response.__dict__}'")
+
+analyzer_response_list = text_analyzer.analyze_input(
+    source_response_list=source_response_list,
+    analyzer_config=ClassificationAnalyzerConfig(
+        labels=[
+            "service",
+            "delay",
+            "tracking",
+            "no response",
+            "missing items",
+            "delivery",
+            "mask",
+        ],
+    ),
+)
+for idx, an_response in enumerate(analyzer_response_list):
+    logger.info(f"analyzer_response#'{idx}'='{an_response.__dict__}'")
+
+sink = ElasticSearchSink()
+sink_response = sink.send_data(analyzer_response_list, sink_config)
+logger.info(f"sink_response='{sink_response}'")
diff --git a/obsei_module/obsei-master/example/email_source_example.py b/obsei_module/obsei-master/example/email_source_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..414819c8c56a5de328d7c7dbe694a5d9d5f4f2ef
--- /dev/null
+++ b/obsei_module/obsei-master/example/email_source_example.py
@@ -0,0 +1,36 @@
+import logging
+import os
+import sys
+from datetime import datetime, timedelta
+
+import pytz
+
+from obsei.misc.utils import DATETIME_STRING_PATTERN
+from obsei.source.email_source import EmailConfig, EmailCredInfo, EmailSource
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(stream=sys.stdout, level=logging.INFO)
+
+since_time = datetime.utcnow().astimezone(pytz.utc) + timedelta(hours=-10)
+
+# List of IMAP servers for most commonly used email providers
+# https://www.systoolsgroup.com/imap/
+# Also, if you're using a Gmail account then make sure you allow less secure apps on your account -
+# https://myaccount.google.com/lesssecureapps?pli=1
+# Also enable IMAP access -
+# https://mail.google.com/mail/u/0/#settings/fwdandpop
+source_config = EmailConfig(
+    imap_server="imap.gmail.com",
+    cred_info=EmailCredInfo(
+        # It will fetch username and password from environment variable
+        username=os.environ.get("email_username"),
+        password=os.environ.get("email_password"),
+    ),
+    lookup_period=since_time.strftime(DATETIME_STRING_PATTERN),
+)
+
+source = EmailSource()
+source_response_list = source.lookup(source_config)
+
+for source_response in source_response_list:
+    logger.info(source_response.__dict__)
diff --git a/obsei_module/obsei-master/example/facebook_example.py b/obsei_module/obsei-master/example/facebook_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..207e1eb005288648bc2c67f15150496e3fd66ab9
--- /dev/null
+++ b/obsei_module/obsei-master/example/facebook_example.py
@@ -0,0 +1,19 @@
+import logging
+import sys
+
+from obsei.source.facebook_source import FacebookSource, FacebookSourceConfig
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(stream=sys.stdout, level=logging.INFO)
+
+source_config = FacebookSourceConfig(page_id="110844591144719", lookup_period="2M")
+source = FacebookSource()
+source_response_list = source.lookup(source_config)
+
+logger.info("DETAILS:")
+for source_response in source_response_list:
+    logger.info(source_response)
+
+logger.info("TEXT:")
+for source_response in source_response_list:
+    logger.info(source_response.processed_text)
diff --git a/obsei_module/obsei-master/example/google_news_example.py b/obsei_module/obsei-master/example/google_news_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..40277f61d7f7b37f62154dd84ed7bb2003a35e9a
--- /dev/null
+++ b/obsei_module/obsei-master/example/google_news_example.py
@@ -0,0 +1,58 @@
+from obsei.analyzer.classification_analyzer import (
+    ClassificationAnalyzerConfig,
+    ZeroShotClassificationAnalyzer,
+)
+from obsei.source.google_news_source import GoogleNewsConfig, GoogleNewsSource
+
+# Only fetch title and highlight
+source_config_without_full_text = GoogleNewsConfig(
+    query="ai",
+    max_results=150,
+    after_date='2023-12-01',
+    before_date='2023-12-31',
+)
+
+# Fetch full news article
+source_config_with_full_text = GoogleNewsConfig(
+    query="ai",
+    max_results=5,
+    fetch_article=True,
+    lookup_period="1d",
+    # proxy="http://127.0.0.1:8080"
+)
+
+source = GoogleNewsSource()
+
+analyzer_config = ClassificationAnalyzerConfig(
+    labels=["buy", "sell", "going up", "going down"],
+)
+
+text_analyzer = ZeroShotClassificationAnalyzer(
+    model_name_or_path="typeform/mobilebert-uncased-mnli", device="auto"
+)
+
+news_articles_without_full_text = source.lookup(source_config_without_full_text)
+
+news_articles_with_full_text = source.lookup(source_config_with_full_text)
+
+
+analyzer_responses_without_full_text = text_analyzer.analyze_input(
+    source_response_list=news_articles_without_full_text,
+    analyzer_config=analyzer_config,
+)
+
+analyzer_responses_with_full_text = text_analyzer.analyze_input(
+    source_response_list=news_articles_with_full_text, analyzer_config=analyzer_config
+)
+
+for article in news_articles_without_full_text:
+    print(article.__dict__)
+
+for response in analyzer_responses_without_full_text:
+    print(response.__dict__)
+
+for article in news_articles_with_full_text:
+    print(article.__dict__)
+
+for response in analyzer_responses_with_full_text:
+    print(response.__dict__)
diff --git a/obsei_module/obsei-master/example/jira_example.py b/obsei_module/obsei-master/example/jira_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..00b59600731b689da9ab57bffd71285e729754e3
--- /dev/null
+++ b/obsei_module/obsei-master/example/jira_example.py
@@ -0,0 +1,77 @@
+# Jira Sink
+import logging
+import os
+import sys
+from pathlib import Path
+
+from pydantic import SecretStr
+
+from obsei.sink.jira_sink import JiraSink, JiraSinkConfig
+from obsei.source.twitter_source import (
+    TwitterCredentials,
+    TwitterSource,
+    TwitterSourceConfig,
+)
+from obsei.analyzer.classification_analyzer import (
+    ClassificationAnalyzerConfig,
+    ZeroShotClassificationAnalyzer,
+)
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(stream=sys.stdout, level=logging.INFO)
+
+dir_path = Path(__file__).resolve().parent.parent
+source_config = TwitterSourceConfig(
+    keywords=["facing issue"],
+    lookup_period="1h",
+    tweet_fields=[
+        "author_id",
+        "conversation_id",
+        "created_at",
+        "id",
+        "public_metrics",
+        "text",
+    ],
+    user_fields=["id", "name", "public_metrics", "username", "verified"],
+    expansions=["author_id"],
+    place_fields=None,
+    max_tweets=10,
+    cred_info=TwitterCredentials(
+        consumer_key=SecretStr(os.environ["twitter_consumer_key"]),
+        consumer_secret=SecretStr(os.environ["twitter_consumer_secret"]),
+    ),
+)
+
+source = TwitterSource()
+
+# To start jira server locally `atlas-run-standalone --product jira`
+jira_sink_config = JiraSinkConfig(
+    url="http://localhost:2990/jira",
+    username=SecretStr("admin"),
+    password=SecretStr("admin"),
+    issue_type={"name": "Task"},
+    project={"key": "CUS"},
+)
+jira_sink = JiraSink()
+
+text_analyzer = ZeroShotClassificationAnalyzer(
+    model_name_or_path="joeddav/bart-large-mnli-yahoo-answers"
+)
+
+source_response_list = source.lookup(source_config)
+for idx, source_response in enumerate(source_response_list):
+    logger.info(f"source_response#'{idx}'='{source_response.__dict__}'")
+
+analyzer_response_list = text_analyzer.analyze_input(
+    source_response_list=source_response_list,
+    analyzer_config=ClassificationAnalyzerConfig(
+        labels=["service", "delay", "performance"],
+    ),
+)
+for idx, an_response in enumerate(analyzer_response_list):
+    logger.info(f"analyzer_response#'{idx}'='{an_response.__dict__}'")
+
+sink_response_list = jira_sink.send_data(analyzer_response_list, jira_sink_config)
+for sink_response in sink_response_list:
+    if sink_response is not None:
+        logger.info(f"sink_response='{sink_response}'")
diff --git a/obsei_module/obsei-master/example/maps_review_scrapper_example.py b/obsei_module/obsei-master/example/maps_review_scrapper_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..d0633a025bb9fc15c933bd5a5a4058a0012e6392
--- /dev/null
+++ b/obsei_module/obsei-master/example/maps_review_scrapper_example.py
@@ -0,0 +1,22 @@
+import logging
+import sys
+
+from obsei.source.google_maps_reviews import (OSGoogleMapsReviewsConfig,
+                                              OSGoogleMapsReviewsSource)
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(stream=sys.stdout, level=logging.INFO)
+
+source_config = OSGoogleMapsReviewsConfig(
+    api_key="<Enter Your API Key>",  # Get API key from https://outscraper.com/
+    queries=[
+        "https://www.google.co.in/maps/place/Taj+Mahal/@27.1751496,78.0399535,17z/data=!4m5!3m4!1s0x39747121d702ff6d:0xdd2ae4803f767dde!8m2!3d27.1751448!4d78.0421422"
+    ],
+    number_of_reviews=3,
+)
+
+source = OSGoogleMapsReviewsSource()
+
+source_response_list = source.lookup(source_config)
+for source_response in source_response_list:
+    logger.info(source_response.__dict__)
diff --git a/obsei_module/obsei-master/example/pandas_sink_example.py b/obsei_module/obsei-master/example/pandas_sink_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..b2d817ad0c369e4b81eda9f754f149c50c2875c8
--- /dev/null
+++ b/obsei_module/obsei-master/example/pandas_sink_example.py
@@ -0,0 +1,49 @@
+import logging
+import sys
+
+from pandas import DataFrame
+
+from obsei.analyzer.classification_analyzer import (
+    ClassificationAnalyzerConfig,
+    ZeroShotClassificationAnalyzer,
+)
+from obsei.sink.pandas_sink import PandasSink, PandasSinkConfig
+from obsei.source.playstore_scrapper import (
+    PlayStoreScrapperConfig,
+    PlayStoreScrapperSource,
+)
+
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(stream=sys.stdout, level=logging.INFO)
+
+source_config = PlayStoreScrapperConfig(
+    countries=["us"], package_name="com.apcoaconnect", max_count=3
+)
+
+source = PlayStoreScrapperSource()
+
+text_analyzer = ZeroShotClassificationAnalyzer(
+    model_name_or_path="typeform/mobilebert-uncased-mnli", device="auto"
+)
+
+# initialize pandas sink config
+sink_config = PandasSinkConfig(dataframe=DataFrame())
+
+# initialize pandas sink
+sink = PandasSink()
+
+source_response_list = source.lookup(source_config)
+
+analyzer_response_list = text_analyzer.analyze_input(
+    source_response_list=source_response_list,
+    analyzer_config=ClassificationAnalyzerConfig(
+        labels=["no parking", "registration issue", "app issue", "payment issue"],
+    ),
+)
+
+dataframe = sink.send_data(
+    analyzer_responses=analyzer_response_list, config=sink_config
+)
+
+print(dataframe.to_csv())
diff --git a/obsei_module/obsei-master/example/pandas_source_example.py b/obsei_module/obsei-master/example/pandas_source_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..5a82af3ac3cb46fd4b08de92b00b04754249511c
--- /dev/null
+++ b/obsei_module/obsei-master/example/pandas_source_example.py
@@ -0,0 +1,27 @@
+import pandas as pd
+
+from obsei.source.pandas_source import (
+    PandasSourceConfig,
+    PandasSource,
+)
+import logging
+import sys
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(stream=sys.stdout, level=logging.INFO)
+
+# Initialize your Pandas DataFrame from your sources like csv, excel, sql etc
+# In following example we are reading csv which have two columns title and text
+csv_file = "https://raw.githubusercontent.com/deepset-ai/haystack/master/tutorials/small_generator_dataset.csv"
+dataframe = pd.read_csv(csv_file)
+
+source_config = PandasSourceConfig(
+    dataframe=dataframe,
+    include_columns=["title"],
+    text_columns=["text"],
+)
+source = PandasSource()
+
+source_response_list = source.lookup(source_config)
+for idx, source_response in enumerate(source_response_list):
+    logger.info(f"source_response#'{idx}'='{source_response.__dict__}'")
diff --git a/obsei_module/obsei-master/example/pii_analyzer_example.py b/obsei_module/obsei-master/example/pii_analyzer_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..36ec4ff72c3f5221ccbc7c35d74897619ce69514
--- /dev/null
+++ b/obsei_module/obsei-master/example/pii_analyzer_example.py
@@ -0,0 +1,33 @@
+import logging
+import sys
+
+from obsei.payload import TextPayload
+from obsei.analyzer.pii_analyzer import (
+    PresidioEngineConfig,
+    PresidioModelConfig,
+    PresidioPIIAnalyzer,
+    PresidioPIIAnalyzerConfig,
+)
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(stream=sys.stdout, level=logging.INFO)
+
+analyzer_config = PresidioPIIAnalyzerConfig(
+    analyze_only=False, return_decision_process=True
+)
+analyzer = PresidioPIIAnalyzer(
+    engine_config=PresidioEngineConfig(
+        nlp_engine_name="spacy",
+        models=[PresidioModelConfig(model_name="en_core_web_lg", lang_code="en")],
+    )
+)
+
+text_to_anonymize = "His name is Mr. Jones and his phone number is 212-555-5555"
+
+analyzer_results = analyzer.analyze_input(
+    source_response_list=[TextPayload(processed_text=text_to_anonymize)],
+    analyzer_config=analyzer_config,
+)
+
+for analyzer_result in analyzer_results:
+    logging.info(analyzer_result.to_dict())
diff --git a/obsei_module/obsei-master/example/play_store_reviews_example.py b/obsei_module/obsei-master/example/play_store_reviews_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..d37669a7a4441ce69be05152c7dad7aad5edd538
--- /dev/null
+++ b/obsei_module/obsei-master/example/play_store_reviews_example.py
@@ -0,0 +1,4 @@
+# TDB
+
+# Need proper service account file to test the changes :(
+print("TBD")
diff --git a/obsei_module/obsei-master/example/playstore_scrapper_example.py b/obsei_module/obsei-master/example/playstore_scrapper_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..00b1a5406f9c9785bd08262559edca0309832617
--- /dev/null
+++ b/obsei_module/obsei-master/example/playstore_scrapper_example.py
@@ -0,0 +1,40 @@
+import logging
+import sys
+
+from obsei.analyzer.classification_analyzer import (
+    ClassificationAnalyzerConfig,
+    ZeroShotClassificationAnalyzer,
+)
+
+from obsei.source.playstore_scrapper import (
+    PlayStoreScrapperConfig,
+    PlayStoreScrapperSource,
+)
+
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(stream=sys.stdout, level=logging.INFO)
+
+source_config = PlayStoreScrapperConfig(
+    app_url='https://play.google.com/store/apps/details?id=com.google.android.gm&hl=en_IN&gl=US',
+    max_count=3
+)
+
+source = PlayStoreScrapperSource()
+
+text_analyzer = ZeroShotClassificationAnalyzer(
+    model_name_or_path="typeform/mobilebert-uncased-mnli", device="auto"
+)
+
+source_response_list = source.lookup(source_config)
+for idx, source_response in enumerate(source_response_list):
+    logger.info(f"source_response#'{idx}'='{source_response.__dict__}'")
+
+analyzer_response_list = text_analyzer.analyze_input(
+    source_response_list=source_response_list,
+    analyzer_config=ClassificationAnalyzerConfig(
+        labels=["interface", "slow", "battery"],
+    ),
+)
+for idx, an_response in enumerate(analyzer_response_list):
+    logger.info(f"analyzer_response#'{idx}'='{an_response.__dict__}'")
diff --git a/obsei_module/obsei-master/example/playstore_scrapper_translator_example.py b/obsei_module/obsei-master/example/playstore_scrapper_translator_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..e89e09be4a17334368fcfe44039fa294728d020e
--- /dev/null
+++ b/obsei_module/obsei-master/example/playstore_scrapper_translator_example.py
@@ -0,0 +1,86 @@
+import json
+import logging
+import sys
+from datetime import datetime, timedelta
+
+import pytz
+
+from obsei.payload import TextPayload
+from obsei.analyzer.classification_analyzer import (
+    ClassificationAnalyzerConfig,
+    ZeroShotClassificationAnalyzer,
+)
+from obsei.analyzer.translation_analyzer import TranslationAnalyzer
+from obsei.misc.utils import DATETIME_STRING_PATTERN
+from obsei.source.playstore_scrapper import (
+    PlayStoreScrapperConfig,
+    PlayStoreScrapperSource,
+)
+
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(stream=sys.stdout, level=logging.INFO)
+source = PlayStoreScrapperSource()
+
+
+def source_fetch():
+    since_time = datetime.utcnow().astimezone(pytz.utc) + timedelta(days=-1)
+    source_config = PlayStoreScrapperConfig(
+        countries=["us"],
+        package_name="com.color.apps.hindikeyboard.hindi.language",
+        lookup_period=since_time.strftime(
+            DATETIME_STRING_PATTERN
+        ),  # todo should be optional
+        max_count=5,
+    )
+    return source.lookup(source_config)
+
+
+def translate_text(text_list):
+    translate_analyzer = TranslationAnalyzer(
+        model_name_or_path="Helsinki-NLP/opus-mt-hi-en", device="auto"
+    )
+    source_responses = [
+        TextPayload(processed_text=text.processed_text, source_name="sample")
+        for text in text_list
+    ]
+    analyzer_responses = translate_analyzer.analyze_input(
+        source_response_list=source_responses
+    )
+    return [
+        TextPayload(
+            processed_text=response.segmented_data["translated_text"],
+            source_name="translator",
+        )
+        for response in analyzer_responses
+    ]
+
+
+def classify_text(text_list):
+    text_analyzer = ZeroShotClassificationAnalyzer(
+        model_name_or_path="joeddav/bart-large-mnli-yahoo-answers", device="cpu"
+    )
+
+    return text_analyzer.analyze_input(
+        source_response_list=text_list,
+        analyzer_config=ClassificationAnalyzerConfig(
+            labels=["no parking", "registration issue", "app issue", "payment issue"],
+        ),
+    )
+
+
+def print_list(text_name, text_list):
+    for idx, text in enumerate(text_list):
+        json_response = json.dumps(text.__dict__, indent=4, sort_keys=True, default=str)
+        logger.info(f"\n{text_name}#'{idx}'='{json_response}'")
+
+
+logger.info("Started...")
+
+source_responses_list = source_fetch()
+translated_text_list = translate_text(source_responses_list)
+analyzer_response_list = classify_text(translated_text_list)
+
+print_list("source_response", source_responses_list)
+print_list("translator_response", translated_text_list)
+print_list("classifier_response", analyzer_response_list)
diff --git a/obsei_module/obsei-master/example/reddit_example.py b/obsei_module/obsei-master/example/reddit_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..fdf3a8d60c0058e8cdde32914d1b984d7cbc848f
--- /dev/null
+++ b/obsei_module/obsei-master/example/reddit_example.py
@@ -0,0 +1,50 @@
+import logging
+import sys
+import time
+from datetime import datetime, timedelta
+
+import pytz
+
+from obsei.misc.utils import DATETIME_STRING_PATTERN
+from obsei.source.reddit_source import RedditConfig, RedditSource
+from obsei.workflow.store import WorkflowStore
+from obsei.workflow.workflow import Workflow, WorkflowConfig
+
+
+def print_state(id: str):
+    logger.info(f"Source State: {source.store.get_source_state(id)}")
+
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(stream=sys.stdout, level=logging.INFO)
+
+since_time = datetime.utcnow().astimezone(pytz.utc) + timedelta(hours=-2)
+# Credentials will be fetched from env variable named reddit_client_id and reddit_client_secret
+source_config = RedditConfig(
+    subreddits=["wallstreetbets"],
+    lookup_period=since_time.strftime(DATETIME_STRING_PATTERN),
+)
+
+source = RedditSource(store=WorkflowStore())
+
+workflow = Workflow(
+    config=WorkflowConfig(
+        source_config=source_config,
+    ),
+)
+source.store.add_workflow(workflow)
+
+
+for i in range(1, 4):
+    print_state(workflow.id)
+    source_response_list = source.lookup(source_config, id=workflow.id)
+
+    if source_response_list is None or len(source_response_list) == 0:
+        break
+
+    for source_response in source_response_list:
+        logger.info(source_response.__dict__)
+
+    time.sleep(10)
+
+print_state(workflow.id)
diff --git a/obsei_module/obsei-master/example/reddit_scrapper_example.py b/obsei_module/obsei-master/example/reddit_scrapper_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..f306024440302f61ebf8f95f29adef98df0f8aaf
--- /dev/null
+++ b/obsei_module/obsei-master/example/reddit_scrapper_example.py
@@ -0,0 +1,30 @@
+import logging
+import sys
+from datetime import datetime, timedelta
+
+import pytz
+
+from obsei.misc.utils import DATETIME_STRING_PATTERN
+from obsei.source.reddit_scrapper import RedditScrapperConfig, RedditScrapperSource
+
+
+def print_state(id: str):
+    logger.info(f"Source State: {source.store.get_source_state(id)}")
+
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(stream=sys.stdout, level=logging.INFO)
+
+since_time = datetime.utcnow().astimezone(pytz.utc) + timedelta(days=-1)
+
+source_config = RedditScrapperConfig(
+    url="https://www.reddit.com/r/wallstreetbets/comments/.rss?sort=new",
+    user_agent="testscript by u/FitStatistician7378",
+    lookup_period=since_time.strftime(DATETIME_STRING_PATTERN),
+)
+
+source = RedditScrapperSource()
+
+source_response_list = source.lookup(source_config)
+for source_response in source_response_list:
+    logger.info(source_response.__dict__)
diff --git a/obsei_module/obsei-master/example/sdk.yaml b/obsei_module/obsei-master/example/sdk.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..89f5d55d3e42af1fc5fe1be71f2d92930342dde4
--- /dev/null
+++ b/obsei_module/obsei-master/example/sdk.yaml
@@ -0,0 +1,97 @@
+twitter_source:
+  _target_: obsei.source.twitter_source.TwitterSourceConfig
+  keywords:
+    - "@sample"
+  lookup_period: "1d"
+  tweet_fields:
+    - "author_id"
+    - "conversation_id"
+    - "created_at"
+    - "id"
+    - "public_metrics"
+    - "text"
+  user_fields:
+    - "id"
+    - "name"
+    - "public_metrics"
+    - "username"
+    - "verified"
+  expansions:
+    - "author_id"
+  place_fields: []
+  max_tweets: 10
+  credential:
+    _target_: obsei.source.twitter_source.TwitterCredentials
+    bearer_token: "bearer_token"
+
+play_store_source:
+  _target_: obsei.source.playstore_reviews.PlayStoreConfig
+  package_name: "com.company.package"
+  max_results: 10
+  num_retries: 2
+  cred_info:
+    _target_: obsei.source.playstore_reviews.GoogleCredInfo
+    service_cred_file: "foo/credential.json"
+    developer_key: "test_key"
+
+daily_get_sink:
+  _target_: obsei.sink.dailyget_sink.DailyGetSinkConfig
+  url: "http://localhost:8080/sample"
+  partner_id: "123456"
+  consumer_phone_number: "1234567890"
+  source_information: "Twitter @sample"
+  base_payload:
+    partnerId: daily_get_sink.partner_id
+    consumerPhoneNumber: daily_get_sink.consumer_phone_number
+
+http_sink:
+  _target_: obsei.sink.http_sink.HttpSinkConfig
+  url: "http://localhost:8080/sample"
+
+elasticsearch_sink:
+  _target_: obsei.sink.elasticsearch_sink.ElasticSearchSinkConfig
+  host: "localhost"
+  port: 9200
+  index_name: "test"
+
+jira_sink:
+  _target_: obsei.sink.jira_sink.JiraSinkConfig
+  url: "http://localhost:2990/jira"
+  username: "user"
+  password: "pass"
+  issue_type:
+    name: "Task"
+  project:
+    key: "CUS"
+
+analyzer_config:
+  _target_: obsei.analyzer.classification_analyzer.ClassificationAnalyzerConfig
+  labels:
+    - "service"
+    - "delay"
+    - "tracking"
+    - "no response"
+  add_positive_negative_labels: false
+
+analyzer:
+  _target_: obsei.analyzer.classification_analyzer.ZeroShotClassificationAnalyzer
+  model_name_or_path: "typeform/mobilebert-uncased-mnli"
+  device: "auto"
+
+slack_sink:
+  _target_: obsei.sink.SlackSink
+
+slack_sink_config:
+  _target_: obsei.sink.SlackSinkConfig
+  slack_token: 'Enter token'
+  channel_id: 'slack channel id'
+  jinja_template: |
+    ```
+           {%- for key, value in payload.items() recursive%}
+               {%- if value is mapping -%}
+      {{loop(value.items())}}
+               {%- else %}
+      {{key}}: {{value}}
+               {%- endif %}
+           {%- endfor%}
+    ```
diff --git a/obsei_module/obsei-master/example/slack_example.py b/obsei_module/obsei-master/example/slack_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..4d7d8a6d1730a7ef37217940eaafd5d96b40f2b0
--- /dev/null
+++ b/obsei_module/obsei-master/example/slack_example.py
@@ -0,0 +1,66 @@
+import logging
+import os
+import sys
+
+from obsei.analyzer.dummy_analyzer import DummyAnalyzer, DummyAnalyzerConfig
+from obsei.processor import Processor
+from obsei.sink.slack_sink import SlackSink, SlackSinkConfig
+from obsei.source.playstore_scrapper import (PlayStoreScrapperConfig,
+                                             PlayStoreScrapperSource)
+from obsei.workflow.store import WorkflowStore
+from obsei.workflow.workflow import Workflow, WorkflowConfig
+
+
+def print_state(identifier: str):
+    logger.info(f"Source State: {source.store.get_source_state(identifier)}")
+
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(stream=sys.stdout, level=logging.INFO)
+
+
+workflow_store = WorkflowStore()
+
+source_config = PlayStoreScrapperConfig(
+    app_url='https://play.google.com/store/apps/details?id=com.google.android.gm&hl=en_IN&gl=US',
+    max_count=3
+)
+
+source = PlayStoreScrapperSource(store=workflow_store)
+
+sink_config = SlackSinkConfig(
+    slack_token=os.environ["SLACK_TOKEN"],
+    channel_id="C01TUPZ23NZ",
+    jinja_template="""
+```
+     {%- for key, value in payload.items() recursive%}
+         {%- if value is mapping -%}
+{{loop(value.items())}}
+         {%- else %}
+{{key}}: {{value}}
+         {%- endif %}
+     {%- endfor%}
+```
+   """
+)
+sink = SlackSink(store=workflow_store)
+
+analyzer_config = DummyAnalyzerConfig()
+analyzer = DummyAnalyzer()
+
+workflow = Workflow(
+    config=WorkflowConfig(
+        source_config=source_config,
+        sink_config=sink_config,
+        analyzer_config=analyzer_config,
+    ),
+)
+workflow_store.add_workflow(workflow)
+
+processor = Processor(
+    analyzer=analyzer, sink=sink, source=source, analyzer_config=analyzer_config
+)
+
+processor.process(workflow=workflow)
+
+print_state(workflow.id)
diff --git a/obsei_module/obsei-master/example/twitter_source_example.py b/obsei_module/obsei-master/example/twitter_source_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..fc92b681dd4b2ea248162954c0ee2d2306b110f0
--- /dev/null
+++ b/obsei_module/obsei-master/example/twitter_source_example.py
@@ -0,0 +1,98 @@
+import logging
+import sys
+
+from obsei.analyzer.classification_analyzer import ZeroShotClassificationAnalyzer, ClassificationAnalyzerConfig
+from obsei.sink.slack_sink import SlackSinkConfig, SlackSink
+from obsei.source.twitter_source import TwitterSourceConfig, TwitterSource, TwitterCredentials
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(stream=sys.stdout, level=logging.INFO)
+
+twitter_cred_info = None
+
+# Enter your twitter credentials
+# Get it from https://developer.twitter.com/en/apply-for-access
+# Currently it will fetch from environment variables: twitter_bearer_token, twitter_consumer_key, twitter_consumer_secret
+# Uncomment below lines if you like to pass credentials directly instead of env variables
+
+# twitter_cred_info = TwitterCredentials(
+#     bearer_token='<Enter bearer_token>',
+#     consumer_key="<Enter consumer_key>",
+#     consumer_secret="<Enter consumer_secret>"
+# )
+
+source_config = TwitterSourceConfig(
+    query="bitcoin",
+    lookup_period="1h",
+    tweet_fields=[
+        "author_id",
+        "conversation_id",
+        "created_at",
+        "id",
+        "public_metrics",
+        "text",
+    ],
+    user_fields=["id", "name", "public_metrics", "username", "verified"],
+    expansions=["author_id"],
+    place_fields=None,
+    max_tweets=10,
+    cred_info=twitter_cred_info or None
+)
+
+source = TwitterSource()
+
+
+sink_config = SlackSinkConfig(
+    # Uncomment below lines if you like to pass credentials directly instead of env variables
+    #    slack_token="SLACK_TOKEN",
+    #    channel_id="CHANNEL_ID",
+    jinja_template="""
+:bell: Hi there!, a new `<{{payload['meta']['tweet_url']}}|tweet>` of interest is found by *Obsei*
+>📝 Content: 
+```{{payload['meta']['text']}}```
+>ℹ️Information:
+```
+User Name: {{payload['meta']['author_info']['name']}} ({{payload['meta']['author_info']['user_url']}})
+Tweet Metrics: Retweets={{payload['meta']['public_metrics']['retweet_count']}}, Likes={{payload['meta']['public_metrics']['like_count']}}
+Author Metrics: Verified={{payload['meta']['author_info']['verified']}}, Followers={{payload['meta']['author_info']['public_metrics']['followers_count']}}
+```
+>🧠 AI Engine Data:
+```
+     {%- for key, value in payload['segmented_data']['classifier_data'].items() recursive%}
+         {%- if value is mapping -%}
+{{loop(value.items())}}
+         {%- else %}
+{{key}}: {{value}}
+         {%- endif %}
+     {%- endfor%}
+```
+   """
+)
+sink = SlackSink()
+
+text_analyzer = ZeroShotClassificationAnalyzer(
+    model_name_or_path="typeform/mobilebert-uncased-mnli", device="auto"
+)
+
+analyzer_config = ClassificationAnalyzerConfig(
+    labels=["going up", "going down"],
+    add_positive_negative_labels=False,
+)
+
+source_response_list = source.lookup(source_config)
+for idx, source_response in enumerate(source_response_list):
+    logger.info(f"source_response#'{idx}'='{source_response.__dict__}'")
+
+analyzer_response_list = text_analyzer.analyze_input(
+    source_response_list=source_response_list,
+    analyzer_config=analyzer_config,
+)
+
+for idx, an_response in enumerate(analyzer_response_list):
+    logger.info(f"analyzer_response#'{idx}'='{an_response.__dict__}'")
+
+sink_response_list = sink.send_data(
+    analyzer_responses=analyzer_response_list, config=sink_config, id=id
+)
+for idx, sink_response in enumerate(sink_response_list):
+    logger.info(f"source_response#'{idx}'='{sink_response.__dict__}'")
diff --git a/obsei_module/obsei-master/example/web_crawler_example.py b/obsei_module/obsei-master/example/web_crawler_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..51e5d68ba7ea902781110b1cdf327cba25630383
--- /dev/null
+++ b/obsei_module/obsei-master/example/web_crawler_example.py
@@ -0,0 +1,43 @@
+# Fetch full news article
+from obsei.source.website_crawler_source import (
+    TrafilaturaCrawlerConfig,
+    TrafilaturaCrawlerSource,
+)
+
+
+def print_list(response_list):
+    for response in response_list:
+        print(response.__dict__)
+
+
+# Single URL
+source_config = TrafilaturaCrawlerConfig(urls=["https://obsei.github.io/obsei/"])
+
+source = TrafilaturaCrawlerSource()
+
+source_response_list = source.lookup(source_config)
+print_list(source_response_list)
+
+
+# RSS feed (Note it will take lot of time)
+source_config = TrafilaturaCrawlerConfig(
+    urls=["https://news.google.com/rss/search?q=bitcoin&hl=en&gl=US&ceid=US:en"],
+    is_feed=True,
+)
+
+source = TrafilaturaCrawlerSource()
+
+source_response_list = source.lookup(source_config)
+print_list(source_response_list)
+
+
+# Full website (Note it will take lot of time)
+source_config = TrafilaturaCrawlerConfig(
+    urls=["https://haystack.deepset.ai/"],
+    is_sitemap=True,
+)
+
+source = TrafilaturaCrawlerSource()
+
+source_response_list = source.lookup(source_config)
+print_list(source_response_list)
diff --git a/obsei_module/obsei-master/example/with_sdk_config_file.py b/obsei_module/obsei-master/example/with_sdk_config_file.py
new file mode 100644
index 0000000000000000000000000000000000000000..37da8373fa032c2ceecab54de181ed9e9190ae04
--- /dev/null
+++ b/obsei_module/obsei-master/example/with_sdk_config_file.py
@@ -0,0 +1,28 @@
+import logging
+import sys
+
+from obsei.configuration import ObseiConfiguration
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(stream=sys.stdout, level=logging.INFO)
+
+obsei_configuration = ObseiConfiguration(
+    config_path="../example",
+    config_filename="sdk.yaml",
+)
+
+text_analyzer = obsei_configuration.initialize_instance("analyzer")
+analyzer_config = obsei_configuration.initialize_instance("analyzer_config")
+slack_source_config = obsei_configuration.initialize_instance("slack_sink_config")
+slack_sink = obsei_configuration.initialize_instance("slack_sink")
+
+play_store_source_config = obsei_configuration.initialize_instance("play_store_source")
+twitter_source_config = obsei_configuration.initialize_instance("twitter_source")
+http_sink_config = obsei_configuration.initialize_instance("http_sink")
+daily_get_sink_config = obsei_configuration.initialize_instance("daily_get_sink")
+# docker run -d --name elasticsearch -p 9200:9200 -e "discovery.type=single-node" elasticsearch:7.9.2
+elasticsearch_sink_config = obsei_configuration.initialize_instance(
+    "elasticsearch_sink"
+)
+# Start jira server locally `atlas-run-standalone --product jira`
+jira_sink_config = obsei_configuration.initialize_instance("jira_sink")
diff --git a/obsei_module/obsei-master/example/with_state_example.py b/obsei_module/obsei-master/example/with_state_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..40ac7a0bc68a781907fd720861f035f557c2b67f
--- /dev/null
+++ b/obsei_module/obsei-master/example/with_state_example.py
@@ -0,0 +1,60 @@
+import logging
+import sys
+import time
+
+from obsei.workflow.store import WorkflowStore
+from obsei.source.twitter_source import TwitterSource, TwitterSourceConfig
+from obsei.workflow.workflow import Workflow, WorkflowConfig
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(stream=sys.stdout, level=logging.INFO)
+
+# Create workflow store instance, by default it will use SQLite to store state data
+store = WorkflowStore()
+
+# Pass store reference to observer, so it can use it to store state data
+source = TwitterSource(store=store)
+
+
+def print_state(id: str):
+    logger.info(f"Source State: {source.store.get_source_state(id)}")
+
+
+source_config = TwitterSourceConfig(
+    keywords=["india"],
+    lookup_period="2m",
+    tweet_fields=[
+        "author_id",
+        "conversation_id",
+        "created_at",
+        "id",
+        "public_metrics",
+        "text",
+    ],
+    user_fields=["id", "name", "public_metrics", "username", "verified"],
+    expansions=["author_id"],
+    place_fields=None,
+    max_tweets=10,
+)
+
+# Create instance of workflow, adding observer config to it, it will autgenerate unique workflow id
+workflow = Workflow(
+    config=WorkflowConfig(
+        source_config=source_config,
+    ),
+)
+# Insert workflow config to DB store
+store.add_workflow(workflow)
+
+for i in range(1, 4):
+    print_state(workflow.id)
+    # Now always pass workflow id to lookup function
+    # Observer will fetch old data from DB suing this id and later store new updated state data against this id to DB 
+    source_response_list = source.lookup(source_config, id=workflow.id)
+
+    if source_response_list is None or len(source_response_list) == 0:
+        break
+
+    time.sleep(180)
+
+print_state(workflow.id)
diff --git a/obsei_module/obsei-master/example/youtube_scrapper_example.py b/obsei_module/obsei-master/example/youtube_scrapper_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..6bded3d4884a7f7523eea96e9275e648ebd9ad2f
--- /dev/null
+++ b/obsei_module/obsei-master/example/youtube_scrapper_example.py
@@ -0,0 +1,36 @@
+import logging
+import sys
+
+from obsei.analyzer.classification_analyzer import (
+    ClassificationAnalyzerConfig, ZeroShotClassificationAnalyzer)
+from obsei.source.youtube_scrapper import (YoutubeScrapperConfig,
+                                           YoutubeScrapperSource)
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(stream=sys.stdout, level=logging.INFO)
+
+source_config = YoutubeScrapperConfig(
+    video_url="https://www.youtube.com/watch?v=uZfns0JIlFk",
+    fetch_replies=True,
+    max_comments=10,
+    lookup_period="1Y",
+)
+
+source = YoutubeScrapperSource()
+
+source_response_list = source.lookup(source_config)
+for idx, source_response in enumerate(source_response_list):
+    logger.info(f"source_response#'{idx}'='{source_response.__dict__}'")
+
+text_analyzer = ZeroShotClassificationAnalyzer(
+    model_name_or_path="typeform/mobilebert-uncased-mnli", device="auto"
+)
+
+analyzer_response_list = text_analyzer.analyze_input(
+    source_response_list=source_response_list,
+    analyzer_config=ClassificationAnalyzerConfig(
+        labels=["interesting", "enquiring"],
+    ),
+)
+for idx, an_response in enumerate(analyzer_response_list):
+    logger.info(f"analyzer_response#'{idx}'='{an_response.__dict__}'")
diff --git a/obsei_module/obsei-master/images/Obsei-flow-diagram.png b/obsei_module/obsei-master/images/Obsei-flow-diagram.png
new file mode 100644
index 0000000000000000000000000000000000000000..e25e5fb52066f679c4ee7433cdcb59c19c4b4bd3
Binary files /dev/null and b/obsei_module/obsei-master/images/Obsei-flow-diagram.png differ
diff --git a/obsei_module/obsei-master/images/Obsei-future-concept.png b/obsei_module/obsei-master/images/Obsei-future-concept.png
new file mode 100644
index 0000000000000000000000000000000000000000..84d97eb97c5cc36b2d471789bad8edc23f2ad48d
Binary files /dev/null and b/obsei_module/obsei-master/images/Obsei-future-concept.png differ
diff --git a/obsei_module/obsei-master/images/jira_screenshot.png b/obsei_module/obsei-master/images/jira_screenshot.png
new file mode 100644
index 0000000000000000000000000000000000000000..d14e391487bdf46c5b279dd0f60c6de16302e901
Binary files /dev/null and b/obsei_module/obsei-master/images/jira_screenshot.png differ
diff --git a/obsei_module/obsei-master/images/logos/Slack_join.svg b/obsei_module/obsei-master/images/logos/Slack_join.svg
new file mode 100644
index 0000000000000000000000000000000000000000..1224dfdaba2b72dbfbc3f86a9e9a29aab4dc96b2
--- /dev/null
+++ b/obsei_module/obsei-master/images/logos/Slack_join.svg
@@ -0,0 +1,49 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!-- Generator: Adobe Illustrator 23.0.1, SVG Export Plug-In . SVG Version: 6.00 Build 0)  -->
+<svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
+	 viewBox="0 0 622.3 254.4" style="enable-background:new 0 0 622.3 254.4;" xml:space="preserve">
+<style type="text/css">
+	.st0{fill:#E01E5A;}
+	.st1{fill:#36C5F0;}
+	.st2{fill:#2EB67D;}
+	.st3{fill:#ECB22E;}
+</style>
+<g>
+	<g>
+		<path d="M221.5,161.5l6.2-14.4c6.7,5,15.6,7.6,24.4,7.6c6.5,0,10.6-2.5,10.6-6.3c-0.1-10.6-38.9-2.3-39.2-28.9
+			c-0.1-13.5,11.9-23.9,28.9-23.9c10.1,0,20.2,2.5,27.4,8.2l-5.8,14.7c-6.6-4.2-14.8-7.2-22.6-7.2c-5.3,0-8.8,2.5-8.8,5.7
+			c0.1,10.4,39.2,4.7,39.6,30.1c0,13.8-11.7,23.5-28.5,23.5C241.4,170.6,230.1,167.7,221.5,161.5"/>
+		<path d="M459.4,141.9c-3.1,5.4-8.9,9.1-15.6,9.1c-9.9,0-17.9-8-17.9-17.9s8-17.9,17.9-17.9c6.7,0,12.5,3.7,15.6,9.1l17.1-9.5
+			c-6.4-11.4-18.7-19.2-32.7-19.2c-20.7,0-37.5,16.8-37.5,37.5c0,20.7,16.8,37.5,37.5,37.5c14.1,0,26.3-7.7,32.7-19.2L459.4,141.9z"
+			/>
+		<rect x="290.8" y="64.5" width="21.4" height="104.7"/>
+		<polygon points="484.9,64.5 484.9,169.2 506.3,169.2 506.3,137.8 531.7,169.2 559.1,169.2 526.8,131.9 556.7,97.1 530.5,97.1
+			506.3,126 506.3,64.5 		"/>
+		<path d="M375.8,142.1c-3.1,5.1-9.5,8.9-16.7,8.9c-9.9,0-17.9-8-17.9-17.9s8-17.9,17.9-17.9c7.2,0,13.6,4,16.7,9.2V142.1z
+			 M375.8,97.1v8.5c-3.5-5.9-12.2-10-21.3-10c-18.8,0-33.6,16.6-33.6,37.4c0,20.8,14.8,37.6,33.6,37.6c9.1,0,17.8-4.1,21.3-10v8.5
+			h21.4v-72H375.8z"/>
+	</g>
+	<g>
+		<g>
+			<path class="st0" d="M89.2,142c0,7.3-5.9,13.2-13.2,13.2s-13.2-5.9-13.2-13.2s5.9-13.2,13.2-13.2h13.2V142z"/>
+			<path class="st0" d="M95.8,142c0-7.3,5.9-13.2,13.2-13.2s13.2,5.9,13.2,13.2V175c0,7.3-5.9,13.2-13.2,13.2s-13.2-5.9-13.2-13.2
+				V142z"/>
+		</g>
+		<g>
+			<path class="st1" d="M109,89c-7.3,0-13.2-5.9-13.2-13.2c0-7.3,5.9-13.2,13.2-13.2s13.2,5.9,13.2,13.2V89H109z"/>
+			<path class="st1" d="M109,95.7c7.3,0,13.2,5.9,13.2,13.2c0,7.3-5.9,13.2-13.2,13.2H75.9c-7.3,0-13.2-5.9-13.2-13.2
+				c0-7.3,5.9-13.2,13.2-13.2H109z"/>
+		</g>
+		<g>
+			<path class="st2" d="M161.9,108.9c0-7.3,5.9-13.2,13.2-13.2s13.2,5.9,13.2,13.2c0,7.3-5.9,13.2-13.2,13.2h-13.2V108.9z"/>
+			<path class="st2" d="M155.3,108.9c0,7.3-5.9,13.2-13.2,13.2s-13.2-5.9-13.2-13.2V75.8c0-7.3,5.9-13.2,13.2-13.2
+				s13.2,5.9,13.2,13.2V108.9z"/>
+		</g>
+		<g>
+			<path class="st3" d="M142.1,161.8c7.3,0,13.2,5.9,13.2,13.2c0,7.3-5.9,13.2-13.2,13.2s-13.2-5.9-13.2-13.2v-13.2H142.1z"/>
+			<path class="st3" d="M142.1,155.2c-7.3,0-13.2-5.9-13.2-13.2s5.9-13.2,13.2-13.2h33.1c7.3,0,13.2,5.9,13.2,13.2
+				s-5.9,13.2-13.2,13.2H142.1z"/>
+		</g>
+	</g>
+</g>
+</svg>
diff --git a/obsei_module/obsei-master/images/logos/appstore.png b/obsei_module/obsei-master/images/logos/appstore.png
new file mode 100644
index 0000000000000000000000000000000000000000..a88786b5cfc6d42af039a0cf0ca15ef0f4ae378f
Binary files /dev/null and b/obsei_module/obsei-master/images/logos/appstore.png differ
diff --git a/obsei_module/obsei-master/images/logos/classification.png b/obsei_module/obsei-master/images/logos/classification.png
new file mode 100644
index 0000000000000000000000000000000000000000..c272cc21dd36d86e3e0fedb387fafe758af59bb1
Binary files /dev/null and b/obsei_module/obsei-master/images/logos/classification.png differ
diff --git a/obsei_module/obsei-master/images/logos/dummy.png b/obsei_module/obsei-master/images/logos/dummy.png
new file mode 100644
index 0000000000000000000000000000000000000000..65a21ae2882d0e26c88e1694425155a81e418507
Binary files /dev/null and b/obsei_module/obsei-master/images/logos/dummy.png differ
diff --git a/obsei_module/obsei-master/images/logos/elastic.png b/obsei_module/obsei-master/images/logos/elastic.png
new file mode 100644
index 0000000000000000000000000000000000000000..47e53a58326b6bf4d5bc8a9a48811c360311a7dd
Binary files /dev/null and b/obsei_module/obsei-master/images/logos/elastic.png differ
diff --git a/obsei_module/obsei-master/images/logos/facebook.png b/obsei_module/obsei-master/images/logos/facebook.png
new file mode 100644
index 0000000000000000000000000000000000000000..b4374d2b0528a0ec9f617a5c7ee33722e98a7d19
Binary files /dev/null and b/obsei_module/obsei-master/images/logos/facebook.png differ
diff --git a/obsei_module/obsei-master/images/logos/gmail.png b/obsei_module/obsei-master/images/logos/gmail.png
new file mode 100644
index 0000000000000000000000000000000000000000..357c439d8d8d7f884a3eeaaf1e1412d73ff5a72c
Binary files /dev/null and b/obsei_module/obsei-master/images/logos/gmail.png differ
diff --git a/obsei_module/obsei-master/images/logos/googlenews.png b/obsei_module/obsei-master/images/logos/googlenews.png
new file mode 100644
index 0000000000000000000000000000000000000000..a7cf45acdb3bb1303a7e2b7ac8f2db9de11da558
Binary files /dev/null and b/obsei_module/obsei-master/images/logos/googlenews.png differ
diff --git a/obsei_module/obsei-master/images/logos/http_api.png b/obsei_module/obsei-master/images/logos/http_api.png
new file mode 100644
index 0000000000000000000000000000000000000000..8bc5c9ae4f47022088572a1753711bf5fd669948
Binary files /dev/null and b/obsei_module/obsei-master/images/logos/http_api.png differ
diff --git a/obsei_module/obsei-master/images/logos/jira.png b/obsei_module/obsei-master/images/logos/jira.png
new file mode 100644
index 0000000000000000000000000000000000000000..d92cf6f845594ace475c07c07cbbcfac0907a660
Binary files /dev/null and b/obsei_module/obsei-master/images/logos/jira.png differ
diff --git a/obsei_module/obsei-master/images/logos/logger.png b/obsei_module/obsei-master/images/logos/logger.png
new file mode 100644
index 0000000000000000000000000000000000000000..34a68ee5a5e73c2b9963ff9284482dcf4effe0f3
Binary files /dev/null and b/obsei_module/obsei-master/images/logos/logger.png differ
diff --git a/obsei_module/obsei-master/images/logos/ner.png b/obsei_module/obsei-master/images/logos/ner.png
new file mode 100644
index 0000000000000000000000000000000000000000..06297eabfb3d5d589943decf931e2ac096ee38d3
Binary files /dev/null and b/obsei_module/obsei-master/images/logos/ner.png differ
diff --git a/obsei_module/obsei-master/images/logos/obsei_200x200.png b/obsei_module/obsei-master/images/logos/obsei_200x200.png
new file mode 100644
index 0000000000000000000000000000000000000000..cb4cb25373412834d3384c0308c8039667111876
Binary files /dev/null and b/obsei_module/obsei-master/images/logos/obsei_200x200.png differ
diff --git a/obsei_module/obsei-master/images/logos/pandas.svg b/obsei_module/obsei-master/images/logos/pandas.svg
new file mode 100644
index 0000000000000000000000000000000000000000..1451f57de198e7283f900a2538212c3ee27458f9
--- /dev/null
+++ b/obsei_module/obsei-master/images/logos/pandas.svg
@@ -0,0 +1,111 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<svg
+   xmlns:dc="http://purl.org/dc/elements/1.1/"
+   xmlns:cc="http://creativecommons.org/ns#"
+   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+   xmlns:svg="http://www.w3.org/2000/svg"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+   id="Layer_1"
+   data-name="Layer 1"
+   viewBox="0 0 210.21 280.43"
+   version="1.1"
+   sodipodi:docname="pandas_mark.svg"
+   inkscape:version="0.92.4 (unknown)">
+  <metadata
+     id="metadata27">
+    <rdf:RDF>
+      <cc:Work
+         rdf:about="">
+        <dc:format>image/svg+xml</dc:format>
+        <dc:type
+           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+      </cc:Work>
+    </rdf:RDF>
+  </metadata>
+  <sodipodi:namedview
+     pagecolor="#ffffff"
+     bordercolor="#666666"
+     borderopacity="1"
+     objecttolerance="10"
+     gridtolerance="10"
+     guidetolerance="10"
+     inkscape:pageopacity="0"
+     inkscape:pageshadow="2"
+     inkscape:window-width="1131"
+     inkscape:window-height="921"
+     id="namedview25"
+     showgrid="false"
+     inkscape:zoom="0.84156476"
+     inkscape:cx="107.48153"
+     inkscape:cy="140.215"
+     inkscape:window-x="0"
+     inkscape:window-y="0"
+     inkscape:window-maximized="0"
+     inkscape:current-layer="Layer_1" />
+  <defs
+     id="defs4">
+    <style
+       id="style2">.cls-1{fill:#130754;}.cls-2{fill:#48e5ac;}.cls-3{fill:#e70488;}</style>
+  </defs>
+  <title
+     id="title6">Artboard 61</title>
+  <rect
+     class="cls-1"
+     x="74.51"
+     y="43.03"
+     width="24.09"
+     height="50.02"
+     id="rect8" />
+  <rect
+     class="cls-1"
+     x="74.51"
+     y="145.78"
+     width="24.09"
+     height="50.02"
+     id="rect10" />
+  <rect
+     class="cls-2"
+     x="74.51"
+     y="107.65"
+     width="24.09"
+     height="23.6"
+     id="rect12"
+     style="fill:#ffca00;fill-opacity:1" />
+  <rect
+     class="cls-1"
+     x="35.81"
+     y="84.15"
+     width="24.09"
+     height="166.27"
+     id="rect14" />
+  <rect
+     class="cls-1"
+     x="112.41"
+     y="187.05"
+     width="24.09"
+     height="50.02"
+     id="rect16" />
+  <rect
+     class="cls-1"
+     x="112.41"
+     y="84.21"
+     width="24.09"
+     height="50.02"
+     id="rect18" />
+  <rect
+     class="cls-3"
+     x="112.41"
+     y="148.84"
+     width="24.09"
+     height="23.6"
+     id="rect20" />
+  <rect
+     class="cls-1"
+     x="150.3"
+     y="30"
+     width="24.09"
+     height="166.27"
+     id="rect22" />
+</svg>
diff --git a/obsei_module/obsei-master/images/logos/pii.png b/obsei_module/obsei-master/images/logos/pii.png
new file mode 100644
index 0000000000000000000000000000000000000000..13a6826f6f8aed02e7b1e89a2a9fac1ff3510481
Binary files /dev/null and b/obsei_module/obsei-master/images/logos/pii.png differ
diff --git a/obsei_module/obsei-master/images/logos/playstore.png b/obsei_module/obsei-master/images/logos/playstore.png
new file mode 100644
index 0000000000000000000000000000000000000000..c054cd04bb47e26fdba2d5b66071a63317182f36
Binary files /dev/null and b/obsei_module/obsei-master/images/logos/playstore.png differ
diff --git a/obsei_module/obsei-master/images/logos/reddit.png b/obsei_module/obsei-master/images/logos/reddit.png
new file mode 100644
index 0000000000000000000000000000000000000000..695eff14557b7ae25f594febd4cd562013fb9c5a
Binary files /dev/null and b/obsei_module/obsei-master/images/logos/reddit.png differ
diff --git a/obsei_module/obsei-master/images/logos/sentiment.png b/obsei_module/obsei-master/images/logos/sentiment.png
new file mode 100644
index 0000000000000000000000000000000000000000..632d49b471815a10ad16e6bdbe0db53a549b6076
Binary files /dev/null and b/obsei_module/obsei-master/images/logos/sentiment.png differ
diff --git a/obsei_module/obsei-master/images/logos/slack.svg b/obsei_module/obsei-master/images/logos/slack.svg
new file mode 100644
index 0000000000000000000000000000000000000000..c37dc5eb49e3ef638f9dd6f4cf9ab345db8c141d
--- /dev/null
+++ b/obsei_module/obsei-master/images/logos/slack.svg
@@ -0,0 +1,33 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!-- Generator: Adobe Illustrator 23.0.1, SVG Export Plug-In . SVG Version: 6.00 Build 0)  -->
+<svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
+	 viewBox="0 0 270 270" style="enable-background:new 0 0 270 270;" xml:space="preserve">
+<style type="text/css">
+	.st0{fill:#E01E5A;}
+	.st1{fill:#36C5F0;}
+	.st2{fill:#2EB67D;}
+	.st3{fill:#ECB22E;}
+</style>
+<g>
+	<g>
+		<path class="st0" d="M99.4,151.2c0,7.1-5.8,12.9-12.9,12.9c-7.1,0-12.9-5.8-12.9-12.9c0-7.1,5.8-12.9,12.9-12.9h12.9V151.2z"/>
+		<path class="st0" d="M105.9,151.2c0-7.1,5.8-12.9,12.9-12.9s12.9,5.8,12.9,12.9v32.3c0,7.1-5.8,12.9-12.9,12.9
+			s-12.9-5.8-12.9-12.9V151.2z"/>
+	</g>
+	<g>
+		<path class="st1" d="M118.8,99.4c-7.1,0-12.9-5.8-12.9-12.9c0-7.1,5.8-12.9,12.9-12.9s12.9,5.8,12.9,12.9v12.9H118.8z"/>
+		<path class="st1" d="M118.8,105.9c7.1,0,12.9,5.8,12.9,12.9s-5.8,12.9-12.9,12.9H86.5c-7.1,0-12.9-5.8-12.9-12.9
+			s5.8-12.9,12.9-12.9H118.8z"/>
+	</g>
+	<g>
+		<path class="st2" d="M170.6,118.8c0-7.1,5.8-12.9,12.9-12.9c7.1,0,12.9,5.8,12.9,12.9s-5.8,12.9-12.9,12.9h-12.9V118.8z"/>
+		<path class="st2" d="M164.1,118.8c0,7.1-5.8,12.9-12.9,12.9c-7.1,0-12.9-5.8-12.9-12.9V86.5c0-7.1,5.8-12.9,12.9-12.9
+			c7.1,0,12.9,5.8,12.9,12.9V118.8z"/>
+	</g>
+	<g>
+		<path class="st3" d="M151.2,170.6c7.1,0,12.9,5.8,12.9,12.9c0,7.1-5.8,12.9-12.9,12.9c-7.1,0-12.9-5.8-12.9-12.9v-12.9H151.2z"/>
+		<path class="st3" d="M151.2,164.1c-7.1,0-12.9-5.8-12.9-12.9c0-7.1,5.8-12.9,12.9-12.9h32.3c7.1,0,12.9,5.8,12.9,12.9
+			c0,7.1-5.8,12.9-12.9,12.9H151.2z"/>
+	</g>
+</g>
+</svg>
diff --git a/obsei_module/obsei-master/images/logos/translator.png b/obsei_module/obsei-master/images/logos/translator.png
new file mode 100644
index 0000000000000000000000000000000000000000..2aa0174fa5f132d6474e051e9be503c105f03719
Binary files /dev/null and b/obsei_module/obsei-master/images/logos/translator.png differ
diff --git a/obsei_module/obsei-master/images/logos/twitter.png b/obsei_module/obsei-master/images/logos/twitter.png
new file mode 100644
index 0000000000000000000000000000000000000000..4e72f8cb100d33850ff0141e009afc30cd0c79b0
Binary files /dev/null and b/obsei_module/obsei-master/images/logos/twitter.png differ
diff --git a/obsei_module/obsei-master/images/logos/webcrawler.png b/obsei_module/obsei-master/images/logos/webcrawler.png
new file mode 100644
index 0000000000000000000000000000000000000000..0fd7f3623df18ed0493e08d526167e7cd1e1ee7e
Binary files /dev/null and b/obsei_module/obsei-master/images/logos/webcrawler.png differ
diff --git a/obsei_module/obsei-master/images/logos/zendesk.png b/obsei_module/obsei-master/images/logos/zendesk.png
new file mode 100644
index 0000000000000000000000000000000000000000..78bfb22e62f72e0a034b8e14a8964d9e5c185375
Binary files /dev/null and b/obsei_module/obsei-master/images/logos/zendesk.png differ
diff --git a/obsei_module/obsei-master/images/obsei-flyer.png b/obsei_module/obsei-master/images/obsei-flyer.png
new file mode 100644
index 0000000000000000000000000000000000000000..e94f831d50cceada70f07a5e1f6814201f7fa76b
Binary files /dev/null and b/obsei_module/obsei-master/images/obsei-flyer.png differ
diff --git a/obsei_module/obsei-master/images/obsei-ui-demo.png b/obsei_module/obsei-master/images/obsei-ui-demo.png
new file mode 100644
index 0000000000000000000000000000000000000000..c0ce9bc92418c70a4e5b7210f7ab89024820dc17
Binary files /dev/null and b/obsei_module/obsei-master/images/obsei-ui-demo.png differ
diff --git a/obsei_module/obsei-master/images/obsei_flow.gif b/obsei_module/obsei-master/images/obsei_flow.gif
new file mode 100644
index 0000000000000000000000000000000000000000..a6538b34996b6092ce3c978b1daf8ff9ad4da683
--- /dev/null
+++ b/obsei_module/obsei-master/images/obsei_flow.gif
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2bb0b0b15bac52084145aea23f9b47b207853ce9c45d4c355ccadffadc129bb9
+size 6226733
diff --git a/obsei_module/obsei-master/mypy.ini b/obsei_module/obsei-master/mypy.ini
new file mode 100644
index 0000000000000000000000000000000000000000..976ba0294638950e865be3934cbeee3b6305ffd6
--- /dev/null
+++ b/obsei_module/obsei-master/mypy.ini
@@ -0,0 +1,2 @@
+[mypy]
+ignore_missing_imports = True
diff --git a/obsei_module/obsei-master/obsei/__init__.py b/obsei_module/obsei-master/obsei/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..845724ca59968b71689a82b2b48d7bd93f142a0c
--- /dev/null
+++ b/obsei_module/obsei-master/obsei/__init__.py
@@ -0,0 +1,19 @@
+import logging
+
+from obsei._version import __version__
+
+logging.basicConfig(
+    format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
+    datefmt="%m/%d/%Y %H:%M:%S",
+    level=logging.INFO,
+)
+
+init_logger: logging.Logger = logging.getLogger(__name__)
+
+installation_message: str = """
+By default `pip install obsei` will only install core dependencies.
+To install all required dependencies use `pip install obsei[all]`.
+Refer https://obsei.com/#install-obsei for more options.
+"""
+
+init_logger.warning(installation_message)
diff --git a/obsei_module/obsei-master/obsei/_version.py b/obsei_module/obsei-master/obsei/_version.py
new file mode 100644
index 0000000000000000000000000000000000000000..6561790f155f6bfd436e5b19b2f0a1e7f20c0259
--- /dev/null
+++ b/obsei_module/obsei-master/obsei/_version.py
@@ -0,0 +1 @@
+__version__ = "0.0.15"
diff --git a/obsei_module/obsei-master/obsei/analyzer/__init__.py b/obsei_module/obsei-master/obsei/analyzer/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/obsei_module/obsei-master/obsei/analyzer/base_analyzer.py b/obsei_module/obsei-master/obsei/analyzer/base_analyzer.py
new file mode 100644
index 0000000000000000000000000000000000000000..aae074243dd431a9f48e7e253f627dc07ecaf7f2
--- /dev/null
+++ b/obsei_module/obsei-master/obsei/analyzer/base_analyzer.py
@@ -0,0 +1,82 @@
+from abc import abstractmethod
+from typing import Any, Generator, List, Optional
+
+from pydantic import Field, PrivateAttr
+from pydantic_settings import BaseSettings
+
+from obsei.misc import gpu_util
+from obsei.payload import TextPayload
+from obsei.postprocessor.inference_aggregator import (
+    InferenceAggregator,
+    InferenceAggregatorConfig,
+)
+from obsei.preprocessor.text_splitter import TextSplitter, TextSplitterConfig
+from obsei.workflow.base_store import BaseStore
+
+MAX_LENGTH: int = 510
+DEFAULT_BATCH_SIZE_GPU: int = 64
+DEFAULT_BATCH_SIZE_CPU: int = 4
+
+
+class BaseAnalyzerConfig(BaseSettings):
+    TYPE: str = "Base"
+    use_splitter_and_aggregator: Optional[bool] = False
+    splitter_config: Optional[TextSplitterConfig] = None
+    aggregator_config: Optional[InferenceAggregatorConfig] = None
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+
+        if self.use_splitter_and_aggregator and not self.splitter_config and not self.aggregator_config:
+            raise AttributeError("Need splitter_config and aggregator_config if enabling use_splitter_and_aggregator "
+                                 "option")
+
+    class Config:
+        arbitrary_types_allowed = True
+
+
+class BaseAnalyzer(BaseSettings):
+    _device_id: int = PrivateAttr()
+    TYPE: str = "Base"
+    store: Optional[BaseStore] = None
+    device: str = "auto"
+    batch_size: int = -1
+    splitter: TextSplitter = Field(default=TextSplitter())
+    aggregator: InferenceAggregator = Field(default=InferenceAggregator())
+
+    """
+        auto: choose gpu if present else use cpu
+        cpu: use cpu
+        cuda:{id} - cuda device id
+    """
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+
+        self._device_id = gpu_util.get_device_id(self.device)
+        if self.batch_size < 0:
+            self.batch_size = (
+                DEFAULT_BATCH_SIZE_CPU
+                if self._device_id == 0
+                else DEFAULT_BATCH_SIZE_GPU
+            )
+
+    @abstractmethod
+    def analyze_input(
+        self,
+        source_response_list: List[TextPayload],
+        analyzer_config: Optional[BaseAnalyzerConfig] = None,
+        **kwargs: Any,
+    ) -> List[TextPayload]:
+        pass
+
+    @staticmethod
+    def batchify(
+        payload_list: List[TextPayload],
+        batch_size: int,
+    ) -> Generator[List[TextPayload], None, None]:
+        for index in range(0, len(payload_list), batch_size):
+            yield payload_list[index : index + batch_size]
+
+    class Config:
+        arbitrary_types_allowed = True
diff --git a/obsei_module/obsei-master/obsei/analyzer/classification_analyzer.py b/obsei_module/obsei-master/obsei/analyzer/classification_analyzer.py
new file mode 100644
index 0000000000000000000000000000000000000000..0f51a48313772ee5883fc1374a114eb977346139
--- /dev/null
+++ b/obsei_module/obsei-master/obsei/analyzer/classification_analyzer.py
@@ -0,0 +1,204 @@
+import logging
+from typing import Any, Dict, List, Optional
+
+from pydantic import Field, PrivateAttr
+from transformers import Pipeline, pipeline
+
+from obsei.analyzer.base_analyzer import (
+    BaseAnalyzer,
+    BaseAnalyzerConfig,
+    MAX_LENGTH,
+)
+from obsei.payload import TextPayload
+from obsei.postprocessor.inference_aggregator import InferenceAggregatorConfig
+from obsei.postprocessor.inference_aggregator_function import ClassificationAverageScore
+
+logger = logging.getLogger(__name__)
+
+
+class ClassificationAnalyzerConfig(BaseAnalyzerConfig):
+    TYPE: str = "Classification"
+    labels: Optional[List[str]] = None
+    label_map: Optional[Dict[str, str]] = None
+    multi_class_classification: bool = True
+    add_positive_negative_labels: bool = True
+    aggregator_config: InferenceAggregatorConfig = Field(
+        InferenceAggregatorConfig(aggregate_function=ClassificationAverageScore())
+    )
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+
+        if self.labels is None:
+            self.multi_class_classification = False
+            self.add_positive_negative_labels = False
+
+
+class TextClassificationAnalyzer(BaseAnalyzer):
+    TYPE: str = "Classification"
+    pipeline_name: str = "text-classification"
+    _pipeline: Pipeline = PrivateAttr()
+    _max_length: int = PrivateAttr()
+    model_name_or_path: str
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+        self._pipeline = pipeline(
+            self.pipeline_name,
+            model=self.model_name_or_path,
+            device=self._device_id,
+        )
+
+        if hasattr(self._pipeline.model.config, "max_position_embeddings"):
+            self._max_length = self._pipeline.model.config.max_position_embeddings
+        else:
+            self._max_length = MAX_LENGTH
+
+    def prediction_from_model(
+        self,
+        texts: List[str],
+        analyzer_config: Optional[ClassificationAnalyzerConfig] = None,
+    ) -> List[Dict[str, Any]]:
+        prediction = self._pipeline(texts)
+        predictions = prediction if isinstance(prediction, list) else [prediction]
+        label_map = analyzer_config.label_map if analyzer_config is not None else {}
+        label_map = label_map or {}
+        return [
+            {
+                label_map.get(prediction["label"], prediction["label"]): prediction["score"]
+            } for prediction in predictions
+        ]
+
+    def analyze_input(  # type: ignore[override]
+        self,
+        source_response_list: List[TextPayload],
+        analyzer_config: Optional[ClassificationAnalyzerConfig] = None,
+        **kwargs: Any,
+    ) -> List[TextPayload]:
+        analyzer_output: List[TextPayload] = []
+
+        if (
+            analyzer_config is not None
+            and analyzer_config.use_splitter_and_aggregator
+            and analyzer_config.splitter_config
+        ):
+            source_response_list = self.splitter.preprocess_input(
+                source_response_list,
+                config=analyzer_config.splitter_config,
+            )
+
+        for batch_responses in self.batchify(source_response_list, self.batch_size):
+            texts = [
+                source_response.processed_text[: self._max_length]
+                for source_response in batch_responses
+            ]
+
+            batch_predictions = self.prediction_from_model(texts=texts, analyzer_config=analyzer_config)
+
+            for score_dict, source_response in zip(batch_predictions, batch_responses):
+                segmented_data = {
+                    "classifier_data": score_dict
+                }
+
+                if source_response.segmented_data:
+                    segmented_data = {
+                        **segmented_data,
+                        **source_response.segmented_data,
+                    }
+
+                analyzer_output.append(
+                    TextPayload(
+                        processed_text=source_response.processed_text,
+                        meta=source_response.meta,
+                        segmented_data=segmented_data,
+                        source_name=source_response.source_name,
+                    )
+                )
+
+        if (
+            analyzer_config is not None
+            and analyzer_config.use_splitter_and_aggregator
+            and analyzer_config.aggregator_config
+        ):
+            analyzer_output = self.aggregator.postprocess_input(
+                input_list=analyzer_output,
+                config=analyzer_config.aggregator_config,
+            )
+
+        return analyzer_output
+
+
+class ZeroShotClassificationAnalyzer(TextClassificationAnalyzer):
+    pipeline_name: str = "zero-shot-classification"
+
+    def prediction_from_model(
+        self,
+        texts: List[str],
+        analyzer_config: Optional[ClassificationAnalyzerConfig] = None,
+    ) -> List[Dict[str, Any]]:
+        if analyzer_config is None:
+            raise ValueError("analyzer_config can't be None")
+
+        labels = analyzer_config.labels or []
+        if analyzer_config.add_positive_negative_labels:
+            if "positive" not in labels:
+                labels.append("positive")
+            if "negative" not in labels:
+                labels.append("negative")
+
+        if len(labels) == 0:
+            raise ValueError("`labels` can't be empty or `add_positive_negative_labels` should be False")
+
+        prediction = self._pipeline(
+            texts, candidate_labels=labels, multi_label=analyzer_config.multi_class_classification
+        )
+        predictions = prediction if isinstance(prediction, list) else [prediction]
+
+        return [dict(zip(prediction["labels"], prediction["scores"])) for prediction in predictions]
+
+    def analyze_input(  # type: ignore[override]
+        self,
+        source_response_list: List[TextPayload],
+        analyzer_config: Optional[ClassificationAnalyzerConfig] = None,
+        **kwargs: Any,
+    ) -> List[TextPayload]:
+        if analyzer_config is None:
+            raise ValueError("analyzer_config can't be None")
+
+        return super().analyze_input(
+            source_response_list=source_response_list,
+            analyzer_config=analyzer_config,
+            **kwargs
+        )
+
+
+# Step 1: Define your configuration (labels, etc.)
+analyzer_config = ClassificationAnalyzerConfig(
+    labels=["Sports", "Politics", "Technology", "Entertainment"],  # Example labels
+    multi_class_classification=False,
+    add_positive_negative_labels=False
+)
+
+# Step 2: Initialize the ZeroShotClassificationAnalyzer
+analyzer = ZeroShotClassificationAnalyzer(
+    model_name_or_path="facebook/bart-large-mnli",  # Using a pre-trained zero-shot classification model
+    device=-1  # Assuming you have a GPU, use device=-1 for CPU
+)
+
+# Step 3: Prepare the input text (as TextPayload objects)
+texts = [
+    "The new iPhone has been released and it's taking the tech world by storm.",
+    "The latest political debate had strong views on the economy.",
+    "The football match between Barcelona and Madrid ended in a draw."
+]
+
+# Create TextPayloads from the texts
+source_responses = [TextPayload(processed_text=text) for text in texts]
+
+# Step 4: Run the analysis
+results = analyzer.analyze_input(source_response_list=source_responses, analyzer_config=analyzer_config)
+
+# Step 5: Output the results
+for result in results:
+    print(f"Text: {result.processed_text}")
+    print(f"Classification Scores: {result.segmented_data['classifier_data']}")
\ No newline at end of file
diff --git a/obsei_module/obsei-master/obsei/analyzer/dummy_analyzer.py b/obsei_module/obsei-master/obsei/analyzer/dummy_analyzer.py
new file mode 100644
index 0000000000000000000000000000000000000000..1119107061ce80db049c18acc7a43c4ef34bf09d
--- /dev/null
+++ b/obsei_module/obsei-master/obsei/analyzer/dummy_analyzer.py
@@ -0,0 +1,46 @@
+from typing import Any, List, Optional
+
+from obsei.analyzer.base_analyzer import (
+    BaseAnalyzer,
+    BaseAnalyzerConfig,
+)
+from obsei.payload import TextPayload
+
+
+class DummyAnalyzerConfig(BaseAnalyzerConfig):
+    TYPE: str = "Dummy"
+    dummy_data: Optional[Any] = None
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+
+
+class DummyAnalyzer(BaseAnalyzer):
+    def analyze_input(  # type: ignore[override]
+        self,
+        source_response_list: List[TextPayload],
+        analyzer_config: Optional[DummyAnalyzerConfig] = None,
+        **kwargs: Any,
+    ) -> List[TextPayload]:
+        responses = []
+        for source_response in source_response_list:
+
+            segmented_data = {
+                "dummy_data": None
+                if not analyzer_config
+                else analyzer_config.dummy_data
+            }
+
+            if source_response.segmented_data:
+                segmented_data = {**segmented_data, **source_response.segmented_data}
+
+            responses.append(
+                TextPayload(
+                    processed_text=source_response.processed_text,
+                    meta=source_response.meta,
+                    source_name=source_response.source_name,
+                    segmented_data=segmented_data,
+                )
+            )
+
+        return responses
diff --git a/obsei_module/obsei-master/obsei/analyzer/ner_analyzer.py b/obsei_module/obsei-master/obsei/analyzer/ner_analyzer.py
new file mode 100644
index 0000000000000000000000000000000000000000..847f722cbfe0c5fcf835286d08df4d106de1849e
--- /dev/null
+++ b/obsei_module/obsei-master/obsei/analyzer/ner_analyzer.py
@@ -0,0 +1,165 @@
+import logging
+from typing import Any, Dict, Generator, List, Optional, Tuple, Iterator
+from pydantic import PrivateAttr
+from transformers import (
+    AutoModelForTokenClassification,
+    AutoTokenizer,
+    Pipeline,
+    pipeline,
+)
+import spacy
+from spacy.language import Language
+from spacy.tokens.doc import Doc
+from obsei.analyzer.base_analyzer import (
+    BaseAnalyzer,
+    BaseAnalyzerConfig,
+    MAX_LENGTH,
+)
+from obsei.payload import TextPayload
+
+logger = logging.getLogger(__name__)
+
+
+class TransformersNERAnalyzer(BaseAnalyzer):
+    _pipeline: Pipeline = PrivateAttr()
+    _max_length: int = PrivateAttr()
+    TYPE: str = "NER"
+    model_name_or_path: str
+    tokenizer_name: Optional[str] = None
+    grouped_entities: Optional[bool] = True
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+
+        model = AutoModelForTokenClassification.from_pretrained(self.model_name_or_path)
+        tokenizer = AutoTokenizer.from_pretrained(
+            self.tokenizer_name if self.tokenizer_name else self.model_name_or_path,
+            use_fast=True,
+        )
+
+        self._pipeline = pipeline(
+            "ner",
+            model=model,
+            tokenizer=tokenizer,
+            grouped_entities=self.grouped_entities,
+            device=self._device_id,
+        )
+
+        if hasattr(self._pipeline.model.config, "max_position_embeddings"):
+            self._max_length = self._pipeline.model.config.max_position_embeddings
+        else:
+            self._max_length = MAX_LENGTH
+
+    def _prediction_from_model(self, texts: List[str]) -> List[List[Dict[str, float]]]:
+        prediction = self._pipeline(texts)
+        return (   # type: ignore[no-any-return]
+            prediction
+            if len(prediction) and isinstance(prediction[0], list)
+            else [prediction]
+        )
+
+    def analyze_input(
+        self,
+        source_response_list: List[TextPayload],
+        analyzer_config: Optional[BaseAnalyzerConfig] = None,
+        **kwargs: Any,
+    ) -> List[TextPayload]:
+        analyzer_output: List[TextPayload] = []
+
+        for batch_responses in self.batchify(source_response_list, self.batch_size):
+            texts = [
+                source_response.processed_text[: self._max_length]
+                for source_response in batch_responses
+            ]
+            batch_predictions = self._prediction_from_model(texts)
+
+            for prediction, source_response in zip(batch_predictions, batch_responses):
+                segmented_data = {"ner_data": prediction}
+                if source_response.segmented_data:
+                    segmented_data = {
+                        **segmented_data,
+                        **source_response.segmented_data,
+                    }
+
+                analyzer_output.append(
+                    TextPayload(
+                        processed_text=source_response.processed_text,
+                        meta=source_response.meta,
+                        segmented_data=segmented_data,
+                        source_name=source_response.source_name,
+                    )
+                )
+        return analyzer_output
+
+
+class SpacyNERAnalyzer(BaseAnalyzer):
+    _nlp: Language = PrivateAttr()
+    TYPE: str = "NER"
+    model_name_or_path: str
+    tokenizer_name: Optional[str] = None
+    grouped_entities: Optional[bool] = True
+    n_process: int = 1
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+        self._nlp = spacy.load(
+            self.model_name_or_path,
+            disable=["tagger", "parser", "attribute_ruler", "lemmatizer"],
+        )
+
+    def _spacy_pipe_batchify(
+        self,
+        texts: List[str],
+        batch_size: int,
+        source_response_list: List[TextPayload],
+    ) -> Generator[Tuple[Iterator[Doc], List[TextPayload]], None, None]:
+        for index in range(0, len(texts), batch_size):
+            yield (
+                self._nlp.pipe(
+                    texts=texts[index: index + batch_size],
+                    batch_size=batch_size,
+                    n_process=self.n_process,
+                ),
+                source_response_list[index: index + batch_size],
+            )
+
+    def analyze_input(
+        self,
+        source_response_list: List[TextPayload],
+        analyzer_config: Optional[BaseAnalyzerConfig] = None,
+        **kwargs: Any,
+    ) -> List[TextPayload]:
+        analyzer_output: List[TextPayload] = []
+        texts = [
+            source_response.processed_text for source_response in source_response_list
+        ]
+
+        for batch_docs, batch_source_response in self._spacy_pipe_batchify(
+            texts, self.batch_size, source_response_list
+        ):
+            for doc, source_response in zip(batch_docs, batch_source_response):
+                ner_prediction = [
+                    {
+                        "entity_group": ent.label_,
+                        "word": ent.text,
+                        "start": ent.start_char,
+                        "end": ent.end_char,
+                    }
+                    for ent in doc.ents
+                ]
+                segmented_data = {"ner_data": ner_prediction}
+                if source_response.segmented_data:
+                    segmented_data = {
+                        **segmented_data,
+                        **source_response.segmented_data,
+                    }
+                analyzer_output.append(
+                    TextPayload(
+                        processed_text=source_response.processed_text,
+                        meta=source_response.meta,
+                        segmented_data=segmented_data,
+                        source_name=source_response.source_name,
+                    )
+                )
+
+        return analyzer_output
diff --git a/obsei_module/obsei-master/obsei/analyzer/pii_analyzer.py b/obsei_module/obsei-master/obsei/analyzer/pii_analyzer.py
new file mode 100644
index 0000000000000000000000000000000000000000..e3826c7b1491a938c4399d55f1f74dc00c9e2475
--- /dev/null
+++ b/obsei_module/obsei-master/obsei/analyzer/pii_analyzer.py
@@ -0,0 +1,191 @@
+import logging
+from typing import Any, Dict, List, Optional
+
+from presidio_analyzer import AnalyzerEngine, EntityRecognizer
+from presidio_anonymizer import AnonymizerEngine
+from presidio_analyzer.nlp_engine import NlpEngineProvider
+from presidio_anonymizer.entities.engine import OperatorConfig
+from pydantic import BaseModel, Field, PrivateAttr
+
+from obsei.analyzer.base_analyzer import (
+    BaseAnalyzer,
+    BaseAnalyzerConfig,
+)
+from obsei.payload import TextPayload
+
+logger = logging.getLogger(__name__)
+
+
+class PresidioModelConfig(BaseModel):
+    lang_code: Optional[str] = Field("en")
+    model_name: Optional[str] = Field("en_core_web_lg")
+
+
+class PresidioEngineConfig(BaseModel):
+    nlp_engine_name: Optional[str] = Field("spacy")
+    models: Optional[List[PresidioModelConfig]] = None
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+
+        if not self.models or len(self.models) == 0:
+            self.models = [PresidioModelConfig()]
+
+
+class PresidioAnonymizerConfig(OperatorConfig, BaseModel): # type: ignore
+    def __init__(self, anonymizer_name: str, params: Optional[Dict[str, Any]] = None):
+        super().__init__(anonymizer_name=anonymizer_name, params=params)
+
+    class Config:
+        arbitrary_types_allowed = True
+
+
+class PresidioPIIAnalyzerConfig(BaseAnalyzerConfig):
+    TYPE: str = "PresidioPII"
+    # To find more details refer https://microsoft.github.io/presidio/anonymizer/
+    anonymizers_config: Optional[Dict[str, PresidioAnonymizerConfig]] = None
+    # To see list of supported entities refer https://microsoft.github.io/presidio/supported_entities/
+    # By default it will search for all the supported entities
+    entities: Optional[List[str]] = None
+    analyze_only: Optional[bool] = False
+    replace_original_text: Optional[bool] = True
+    # Whether the analysis decision process steps returned in the response
+    return_decision_process: Optional[bool] = False
+
+
+class PresidioPIIAnalyzer(BaseAnalyzer):
+    _analyzer: AnalyzerEngine = PrivateAttr()
+    _anonymizer: AnonymizerEngine = PrivateAttr()
+    TYPE: str = "PresidioPII"
+    engine_config: Optional[PresidioEngineConfig] = None
+    # To see list of supported entities refer https://microsoft.github.io/presidio/supported_entities/
+    # To add customer recognizers refer https://microsoft.github.io/presidio/analyzer/adding_recognizers/
+    entity_recognizers: Optional[List[EntityRecognizer]] = None
+    # To find more details refer https://microsoft.github.io/presidio/anonymizer/
+    anonymizers_config: Optional[Dict[str, OperatorConfig]] = None
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+
+        if not self.engine_config:
+            self.engine_config = PresidioEngineConfig()
+
+        if not self.engine_config.models or len(self.engine_config.models) == 0:
+            self.engine_config.models = [PresidioModelConfig()]
+
+        # If spacy engine then load Spacy models and select languages
+        languages = []
+        for model_config in self.engine_config.models:
+            languages.append(model_config.lang_code)
+
+            # Check SpacyNlpEngine.engine_name
+            if (
+                self.engine_config.nlp_engine_name == "spacy"
+                and model_config.model_name is not None
+            ):
+                try:
+                    spacy_model = __import__(model_config.model_name)
+                    spacy_model.load()
+                    logger.info(
+                        f"Spacy model {model_config.model_name} is already downloaded"
+                    )
+                except:
+                    logger.warning(
+                        f"Spacy model {model_config.model_name} is not downloaded"
+                    )
+                    logger.warning(
+                        f"Downloading spacy model {model_config.model_name}, it might take some time"
+                    )
+                    from spacy.cli import download  # type: ignore
+
+                    download(model_config.model_name)
+
+        # Create NLP engine based on configuration
+        provider = NlpEngineProvider(nlp_configuration=self.engine_config.dict())
+        nlp_engine = provider.create_engine()
+
+        # Pass the created NLP engine and supported_languages to the AnalyzerEngine
+        self._analyzer = AnalyzerEngine(
+            nlp_engine=nlp_engine, supported_languages=languages
+        )
+
+        # self._analyzer.registry.load_predefined_recognizers()
+        if self.entity_recognizers:
+            for entity_recognizer in self.entity_recognizers:
+                self._analyzer.registry.add_recognizer(entity_recognizer)
+
+        # Initialize the anonymizer with logger
+        self._anonymizer = AnonymizerEngine()
+
+    def analyze_input(  # type: ignore[override]
+        self,
+        source_response_list: List[TextPayload],
+        analyzer_config: Optional[PresidioPIIAnalyzerConfig] = None,
+        language: Optional[str] = "en",
+        **kwargs: Any,
+    ) -> List[TextPayload]:
+        if analyzer_config is None:
+            raise ValueError("analyzer_config can't be None")
+
+        analyzer_output: List[TextPayload] = []
+
+        for batch_responses in self.batchify(source_response_list, self.batch_size):
+            for source_response in batch_responses:
+                analyzer_result = self._analyzer.analyze(
+                    text=source_response.processed_text,
+                    entities=analyzer_config.entities,
+                    return_decision_process=analyzer_config.return_decision_process,
+                    language=language,
+                )
+
+                anonymized_result = None
+                if not analyzer_config.analyze_only:
+                    anonymizers_config = (
+                        analyzer_config.anonymizers_config or self.anonymizers_config
+                    )
+
+                    if (
+                        source_response.processed_text is not None
+                        and len(source_response.processed_text) > 0
+                    ):
+                        anonymized_result = self._anonymizer.anonymize(
+                            text=source_response.processed_text,
+                            operators=anonymizers_config,
+                            analyzer_results=analyzer_result,
+                        )
+
+                if (
+                    analyzer_config.replace_original_text
+                    and anonymized_result is not None
+                ):
+                    text = anonymized_result.text
+                else:
+                    text = source_response.processed_text
+
+                segmented_data = {
+                    "pii_data": {
+                        "analyzer_result": [vars(result) for result in analyzer_result],
+                        "anonymized_result": None
+                        if not anonymized_result
+                        else [vars(item) for item in anonymized_result.items],
+                        "anonymized_text": None
+                        if not anonymized_result
+                        else anonymized_result.text,
+                    }
+                }
+                if source_response.segmented_data:
+                    segmented_data = {
+                        **segmented_data,
+                        **source_response.segmented_data,
+                    }
+
+                analyzer_output.append(
+                    TextPayload(
+                        processed_text=text,
+                        meta=source_response.meta,
+                        segmented_data=segmented_data,
+                        source_name=source_response.source_name,
+                    )
+                )
+
+        return analyzer_output
diff --git a/obsei_module/obsei-master/obsei/analyzer/sentiment_analyzer.py b/obsei_module/obsei-master/obsei/analyzer/sentiment_analyzer.py
new file mode 100644
index 0000000000000000000000000000000000000000..b03baa8b27721ef4d1fa65bd05675088bb59c68e
--- /dev/null
+++ b/obsei_module/obsei-master/obsei/analyzer/sentiment_analyzer.py
@@ -0,0 +1,94 @@
+import logging
+from typing import Any, List, Optional
+
+from pydantic import PrivateAttr
+from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
+
+from obsei.analyzer.base_analyzer import (
+    BaseAnalyzer,
+    BaseAnalyzerConfig,
+)
+from obsei.payload import TextPayload
+from obsei.analyzer.classification_analyzer import (
+    ClassificationAnalyzerConfig,
+    ZeroShotClassificationAnalyzer,
+)
+
+logger = logging.getLogger(__name__)
+
+
+class VaderSentimentAnalyzer(BaseAnalyzer):
+    _model: SentimentIntensityAnalyzer = PrivateAttr()
+    TYPE: str = "Sentiment"
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+        self._model = SentimentIntensityAnalyzer()
+
+    def _get_sentiment_score_from_vader(self, text: str) -> float:
+        scores = self._model.polarity_scores(text)
+        return float(scores["compound"])
+
+    def analyze_input(
+        self,
+        source_response_list: List[TextPayload],
+        analyzer_config: Optional[BaseAnalyzerConfig] = None,
+        **kwargs: Any,
+    ) -> List[TextPayload]:
+        analyzer_output: List[TextPayload] = []
+
+        for batch_responses in self.batchify(source_response_list, self.batch_size):
+            for source_response in batch_responses:
+                classification_map = {}
+                sentiment_value = self._get_sentiment_score_from_vader(
+                    source_response.processed_text
+                )
+                if sentiment_value < 0.0:
+                    classification_map["negative"] = -sentiment_value
+                    classification_map["positive"] = (
+                        1.0 - classification_map["negative"]
+                    )
+                else:
+                    classification_map["positive"] = sentiment_value
+                    classification_map["negative"] = (
+                        1.0 - classification_map["positive"]
+                    )
+
+                segmented_data = {"classifier_data": classification_map}
+                if source_response.segmented_data:
+                    segmented_data = {
+                        **segmented_data,
+                        **source_response.segmented_data,
+                    }
+
+                analyzer_output.append(
+                    TextPayload(
+                        processed_text=source_response.processed_text,
+                        meta=source_response.meta,
+                        segmented_data=segmented_data,
+                        source_name=source_response.source_name,
+                    )
+                )
+
+        return analyzer_output
+
+
+class TransformersSentimentAnalyzerConfig(ClassificationAnalyzerConfig):
+    TYPE: str = "Sentiment"
+    labels: List[str] = ["positive", "negative"]
+    multi_class_classification: bool = False
+
+
+class TransformersSentimentAnalyzer(ZeroShotClassificationAnalyzer):
+    def analyze_input(  # type: ignore[override]
+        self,
+        source_response_list: List[TextPayload],
+        analyzer_config: Optional[TransformersSentimentAnalyzerConfig] = None,
+        **kwargs: Any,
+    ) -> List[TextPayload]:
+        return super().analyze_input(
+            source_response_list=source_response_list,
+            analyzer_config=analyzer_config,
+            add_positive_negative_labels=True,
+            **kwargs,
+        )
diff --git a/obsei_module/obsei-master/obsei/analyzer/test2.py b/obsei_module/obsei-master/obsei/analyzer/test2.py
new file mode 100644
index 0000000000000000000000000000000000000000..d9ba2cc168ce0da9def9b0c15f3018c467d19843
--- /dev/null
+++ b/obsei_module/obsei-master/obsei/analyzer/test2.py
@@ -0,0 +1,34 @@
+from obsei.payload import TextPayload
+from transformers import pipeline
+from obsei.analyzer.classification_analyzer import ZeroShotClassificationAnalyzer, ClassificationAnalyzerConfig
+
+# Step 1: Define your configuration (labels, etc.)
+analyzer_config = ClassificationAnalyzerConfig(
+    labels=["Sports", "Politics", "Technology", "Entertainment"],  # Example labels
+    multi_class_classification=False,
+    add_positive_negative_labels=False
+)
+
+# Step 2: Initialize the ZeroShotClassificationAnalyzer
+analyzer = ZeroShotClassificationAnalyzer(
+    model_name_or_path="facebook/bart-large-mnli",  # Using a pre-trained zero-shot classification model
+    device="cpu"  # Use GPU, set to "cpu" if using CPU
+)
+
+# Step 3: Prepare the input text (as TextPayload objects)
+texts = [
+    "The new iPhone has been released and it's taking the tech world by storm.",
+    "The latest political debate had strong views on the economy.",
+    "The football match between Barcelona and Madrid ended in a draw."
+]
+
+# Create TextPayloads from the texts
+source_responses = [TextPayload(processed_text=text) for text in texts]
+
+# Step 4: Run the analysis
+results = analyzer.analyze_input(source_response_list=source_responses, analyzer_config=analyzer_config)
+
+# Step 5: Output the results
+for result in results:
+    print(f"Text: {result.processed_text}")
+    print(f"Classification Scores: {result.segmented_data['classifier_data']}")
diff --git a/obsei_module/obsei-master/obsei/analyzer/translation_analyzer.py b/obsei_module/obsei-master/obsei/analyzer/translation_analyzer.py
new file mode 100644
index 0000000000000000000000000000000000000000..45d71e9643b1fc5d14c2437645e84f10244d8a1a
--- /dev/null
+++ b/obsei_module/obsei-master/obsei/analyzer/translation_analyzer.py
@@ -0,0 +1,70 @@
+from typing import Any, List, Optional
+
+from pydantic import PrivateAttr
+from transformers import pipeline, Pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
+
+from obsei.analyzer.base_analyzer import (
+    BaseAnalyzer,
+    BaseAnalyzerConfig,
+    MAX_LENGTH,
+)
+from obsei.payload import TextPayload
+
+
+class TranslationAnalyzer(BaseAnalyzer):
+    _pipeline: Pipeline = PrivateAttr()
+    _max_length: int = PrivateAttr()
+    TYPE: str = "Translation"
+    model_name_or_path: str
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+        tokenizer = AutoTokenizer.from_pretrained(self.model_name_or_path)
+        model = AutoModelForSeq2SeqLM.from_pretrained(self.model_name_or_path)
+        self._pipeline = pipeline(
+            "translation", model=model, tokenizer=tokenizer, device=self._device_id
+        )
+        if hasattr(self._pipeline.model.config, "max_position_embeddings"):
+            self._max_length = self._pipeline.model.config.max_position_embeddings
+        else:
+            self._max_length = MAX_LENGTH
+
+    def analyze_input(
+        self,
+        source_response_list: List[TextPayload],
+        analyzer_config: Optional[BaseAnalyzerConfig] = None,
+        **kwargs: Any,
+    ) -> List[TextPayload]:
+
+        analyzer_output = []
+
+        for batch_responses in self.batchify(source_response_list, self.batch_size):
+            texts = [
+                source_response.processed_text[: self._max_length]
+                for source_response in batch_responses
+            ]
+
+            batch_predictions = self._pipeline(texts)
+
+            for prediction, source_response in zip(batch_predictions, batch_responses):
+                segmented_data = {
+                    "translation_data": {
+                        "original_text": source_response.processed_text
+                    }
+                }
+                if source_response.segmented_data:
+                    segmented_data = {
+                        **segmented_data,
+                        **source_response.segmented_data,
+                    }
+
+                analyzer_output.append(
+                    TextPayload(
+                        processed_text=prediction["translation_text"],
+                        meta=source_response.meta,
+                        segmented_data=segmented_data,
+                        source_name=source_response.source_name,
+                    )
+                )
+
+        return analyzer_output
diff --git a/obsei_module/obsei-master/obsei/configuration.py b/obsei_module/obsei-master/obsei/configuration.py
new file mode 100644
index 0000000000000000000000000000000000000000..cd55fb90e4fcff3236f7e6760e5affdd1974b936
--- /dev/null
+++ b/obsei_module/obsei-master/obsei/configuration.py
@@ -0,0 +1,37 @@
+import logging
+from typing import Any, Dict, Optional, Union
+
+import yaml
+from pydantic import Field
+from pydantic_settings import BaseSettings
+
+from obsei.misc.utils import dict_to_object
+
+logger = logging.getLogger(__name__)
+
+
+class ObseiConfiguration(BaseSettings):
+    configuration: Optional[Dict[str, Any]] = None
+    config_path: Optional[str] = Field(None, env="obsei_config_path")
+    config_filename: Optional[str] = Field(None, env="obsei_config_filename")
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+
+        if self.configuration is None:
+            self.configuration = yaml.load(
+                open(f"{self.config_path}/{self.config_filename}", "r"),
+                Loader=yaml.FullLoader,
+            )
+        logger.debug(f"Configuration: {self.configuration}")
+
+    def initialize_instance(self, key_name: Optional[str] = None) -> Union[Any]:
+        if (
+            key_name is None
+            or self.configuration is None
+            or key_name not in self.configuration
+            or not self.configuration[key_name]
+        ):
+            logger.warning(f"{key_name} not exist in configuration")
+            return None
+        return dict_to_object(self.configuration[key_name])
diff --git a/obsei_module/obsei-master/obsei/misc/__init__.py b/obsei_module/obsei-master/obsei/misc/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/obsei_module/obsei-master/obsei/misc/gpu_util.py b/obsei_module/obsei-master/obsei/misc/gpu_util.py
new file mode 100644
index 0000000000000000000000000000000000000000..fc442f1d8fbee20066ffaf520ecfb26964636e23
--- /dev/null
+++ b/obsei_module/obsei-master/obsei/misc/gpu_util.py
@@ -0,0 +1,18 @@
+import torch
+
+
+def is_gpu_available() -> bool:
+    return torch.cuda.is_available()
+
+
+def get_device_id(device: str) -> int:
+    if device == "cpu":
+        return -1
+    elif device == "auto":
+        return 0 if is_gpu_available() else -1
+    elif device.startswith("cuda:"):
+        device_no = device.replace("cuda:", "")
+        if device_no.isnumeric():
+            return int(device_no)
+
+    raise Exception(f"Invalid device: '{device}'")
diff --git a/obsei_module/obsei-master/obsei/misc/utils.py b/obsei_module/obsei-master/obsei/misc/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..e192578ed957b5946cc3f3f1397041040fc8b054
--- /dev/null
+++ b/obsei_module/obsei-master/obsei/misc/utils.py
@@ -0,0 +1,210 @@
+import json
+import math
+import time
+
+import dateparser
+from datetime import datetime, timezone
+from importlib import import_module
+from typing import Any, Dict, Optional, Union
+
+from bs4 import BeautifulSoup
+from bs4.element import Comment
+from dateutil.relativedelta import relativedelta
+
+DATETIME_STRING_PATTERN = "%Y-%m-%dT%H:%M:%SZ"
+DEFAULT_LOOKUP_PERIOD = "1h"
+
+
+# Used from https://stackoverflow.com/a/52081812 and modified
+def flatten_dict(
+    dictionary: Dict[str, Any],
+    round_the_float: bool = True,
+    float_round_format_str: str = ".2f",
+    separator: str = "_",
+) -> Dict[str, Any]:
+    out: Dict[str, Any] = {}
+    for key, val in dictionary.items():
+        if isinstance(val, dict):
+            val = [val]
+        if isinstance(val, list):
+            for sub_dict in val:
+                deeper = flatten_dict(sub_dict).items()
+                out.update({key + separator + key2: val2 for key2, val2 in deeper})
+        elif isinstance(val, float) and round_the_float:
+            out[key] = format(val, float_round_format_str)
+        else:
+            out[key] = val
+    return out
+
+
+def obj_to_json(obj: Any, sort_keys: bool = False, indent: Optional[int] = None) -> Union[bytes, None]:
+    if obj is None:
+        return None
+    return json.dumps(
+        obj,
+        default=datetime_handler,
+        ensure_ascii=False,
+        sort_keys=sort_keys,
+        indent=indent,
+    ).encode("utf8")
+
+
+def obj_to_markdown(
+    obj: Any,
+    level: int = 1,
+    str_enclose_start: Optional[str] = None,
+    str_enclose_end: Optional[str] = None,
+) -> str:
+    key_prefix = "*" * level
+
+    markdowns = []
+    if is_collection(obj):
+        add_key = True
+        if hasattr(obj, "__dict__"):
+            item_view = obj.__dict__.items()
+        elif isinstance(obj, dict):
+            item_view = obj.items()
+        else:
+            add_key = False
+            item_view = enumerate(obj)
+
+        for key, val in item_view:
+            if add_key:
+                header = f"{key_prefix} {key}"
+            else:
+                header = key_prefix
+            if is_collection(val):
+                child_markdown = obj_to_markdown(
+                    obj=val,
+                    level=level + 1,
+                    str_enclose_start=str_enclose_start,
+                    str_enclose_end=str_enclose_end,
+                )
+                markdowns.append(f"{header}\n{child_markdown}")
+            elif str_enclose_start is not None and isinstance(val, str):
+                markdowns.append(
+                    f"{header}:\n{str_enclose_start}{val}{str_enclose_end}"
+                )
+            else:
+                markdowns.append(f"{header}: {val}")
+    elif str_enclose_start is not None and isinstance(obj, str):
+        markdowns.append(f"{key_prefix}:\n{str_enclose_start}{obj}{str_enclose_end}")
+    else:
+        markdowns.append(f"{key_prefix}: {obj}")
+
+    return "\n".join(markdowns)
+
+
+def is_collection(obj: Any) -> bool:
+    return isinstance(obj, (dict, list)) or hasattr(obj, "__dict__")
+
+
+# Copied from searchtweets-v2 and bit modified
+def convert_utc_time(datetime_str: str) -> datetime:
+    """
+    Handles datetime argument conversion to the Labs API format, which is
+    `YYYY-MM-DDTHH:mm:ssZ`.
+    Flexible passing of date formats in the following types::
+
+        - YYYYmmDDHHMM
+        - YYYY-mm-DD
+        - YYYY-mm-DD HH:MM
+        - YYYY-mm-DDTHH:MM
+        - 2m (set start_time to two months ago)
+        - 3d (set start_time to three days ago)
+        - 12h (set start_time to twelve hours ago)
+        - 15m (set start_time to fifteen minutes ago)
+
+    Args:
+        datetime_str (str): valid formats are listed above.
+
+    Returns:
+        string of ISO formatted date.
+    """
+    try:
+        if len(datetime_str) <= 5:
+            _date = datetime.utcnow()
+            # parse out numeric character.
+            num = int(datetime_str[:-1])
+            if "d" in datetime_str:
+                _date = _date + relativedelta(days=-num)
+            elif "h" in datetime_str:
+                _date = _date + relativedelta(hours=-num)
+            elif "m" in datetime_str:
+                _date = _date + relativedelta(minutes=-num)
+            elif "M" in datetime_str:
+                _date = _date + relativedelta(months=-num)
+            elif "Y" in datetime_str:
+                _date = _date + relativedelta(years=-num)
+        elif not {"-", ":"} & set(datetime_str):
+            _date = datetime.strptime(datetime_str, "%Y%m%d%H%M")
+        elif "T" in datetime_str:
+            _date = datetime.strptime(datetime_str, DATETIME_STRING_PATTERN)
+        else:
+            _date = datetime.strptime(datetime_str, "%Y-%m-%d %H:%M")
+
+    except ValueError:
+        _date = datetime.strptime(datetime_str, "%Y-%m-%d")
+
+    return _date.replace(tzinfo=timezone.utc)
+
+
+def convert_datetime_str_to_epoch(datetime_str: str) -> Optional[int]:
+    parsed_datetime = dateparser.parse(datetime_str)
+    if not parsed_datetime:
+        return None
+    unix_timestamp = time.mktime(parsed_datetime.timetuple())
+    return math.trunc(unix_timestamp)
+
+
+def tag_visible(element: Any) -> bool:
+    if element.parent.name in [
+        "style",
+        "script",
+        "head",
+        "title",
+        "meta",
+        "[document]",
+    ]:
+        return False
+    if isinstance(element, Comment):
+        return False
+    return True
+
+
+def text_from_html(body: Union[str, bytes]) -> str:
+    soup = BeautifulSoup(body, "html.parser")
+    texts = soup.findAll(text=True)
+    visible_texts = filter(tag_visible, texts)
+    return " ".join(t.strip() for t in visible_texts)
+
+
+def dict_to_object(
+    dictionary: Dict[str, Any],
+    class_name_key: Optional[str] = "_target_",
+    full_class_name: Optional[str] = None,
+) -> Any:
+    new_dict: Dict[str, Any] = dict()
+    for k, v in dictionary.items():
+        if k == class_name_key:
+            full_class_name = v
+        elif isinstance(v, Dict):
+            new_dict[k] = dict_to_object(dictionary=v, class_name_key=class_name_key)
+        else:
+            new_dict[k] = v
+
+    if full_class_name is None:
+        return new_dict
+
+    module_name, class_name = tuple(full_class_name.rsplit(".", 1))
+    module = import_module(module_name)
+    class_ref = getattr(module, class_name)
+    return class_ref(**new_dict)
+
+
+def datetime_handler(x: Any) -> Optional[Any]:
+    if x is None:
+        return None
+    elif isinstance(x, datetime):
+        return x.isoformat()
+    return vars(x) if hasattr(x, "__dict__") else x
diff --git a/obsei_module/obsei-master/obsei/misc/web_search.py b/obsei_module/obsei-master/obsei/misc/web_search.py
new file mode 100644
index 0000000000000000000000000000000000000000..4b8f92c8c76226b05102c14cc992d1b323e39eaa
--- /dev/null
+++ b/obsei_module/obsei-master/obsei/misc/web_search.py
@@ -0,0 +1,35 @@
+from typing import Any, Dict, List, Optional
+
+import requests
+from requests.adapters import HTTPAdapter
+from urllib3 import Retry
+
+GOOGLE_SEARCH_URL = "https://www.google.com/search"
+
+
+# Code is influenced from https://github.com/cowboy-bebug/app-store-scraper
+def perform_search(
+    request_url: str,
+    query: str,
+    search_url: str = GOOGLE_SEARCH_URL,
+    search_country: Optional[str] = None,
+    headers: Optional[Dict[str, Any]] = None,
+    total: int = 3,
+    backoff_factor: int = 3,
+    status_force_list: Optional[List[int]] = None,
+) -> requests.Response:
+
+    params = {"q": query}
+    if search_country:
+        params["cr"] = search_country
+
+    if not status_force_list:
+        status_force_list = [404, 429]
+    retries = Retry(
+        total=total,
+        backoff_factor=backoff_factor,
+        status_forcelist=status_force_list,
+    )
+    with requests.Session() as s:
+        s.mount(request_url, HTTPAdapter(max_retries=retries))
+        return s.get(search_url, headers=headers, params=params)
diff --git a/obsei_module/obsei-master/obsei/misc/youtube_reviews_scrapper.py b/obsei_module/obsei-master/obsei/misc/youtube_reviews_scrapper.py
new file mode 100644
index 0000000000000000000000000000000000000000..090c2e3d529b858856155d508eee5bb9f2ac3158
--- /dev/null
+++ b/obsei_module/obsei-master/obsei/misc/youtube_reviews_scrapper.py
@@ -0,0 +1,169 @@
+# Code in this file is copied from https://github.com/egbertbouman/youtube-comment-downloader/blob/master/youtube_comment_downloader/downloader.py
+# and modified to fit the needs of this project. When code from youtube-comment-downloader was copied it was MIT licensed.
+# Code Commit: https://github.com/egbertbouman/youtube-comment-downloader/commit/9a15b8e3fbaebad660875409fb1bbe74db17f304
+
+import json
+import logging
+import time
+import re
+from datetime import datetime, timezone
+
+import dateparser
+from typing import Optional, Any, List, Dict, Generator
+
+import requests
+from pydantic import BaseModel
+from requests import Session
+
+logger = logging.getLogger(__name__)
+
+
+class YouTubeCommentExtractor(BaseModel):
+    _YT_URL: str = 'https://www.youtube.com'
+    _YT_CFG_REGEX: str = r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;'
+    _YT_INITIAL_DATA_REGEX: str = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;\s*(?:var\s+meta|</script|\n)'
+    video_url: str
+    user_agent: str = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36'
+    sort_by: int = 1  # 0 = sort by popular, 1 = sort by recent
+    max_comments: Optional[int] = 20
+    fetch_replies: bool = False
+    lang_code: Optional[str] = None
+    sleep_time: float = 0.1
+    request_retries: int = 5
+
+    def __init__(self, **kwargs: Any):
+        super().__init__(**kwargs)
+
+        if self.sort_by not in [0, 1]:
+            raise ValueError('sort_by must be either 0 or 1')
+
+    @staticmethod
+    def _regex_search(text: str, pattern: str, group: int = 1) -> str:
+        match = re.search(pattern, text)
+        return match.group(group) if match else ''
+
+    def _ajax_request(self, session: Session, endpoint: Dict[str, Any], ytcfg: Dict[str, Any]) -> Any:
+        url = self._YT_URL + endpoint['commandMetadata']['webCommandMetadata']['apiUrl']
+
+        data = {'context': ytcfg['INNERTUBE_CONTEXT'],
+                'continuation': endpoint['continuationCommand']['token']}
+
+        for _ in range(self.request_retries):
+            response = session.post(url, params={'key': ytcfg['INNERTUBE_API_KEY']}, json=data)
+            if response.status_code == 200:
+                return response.json()
+            if response.status_code in [403, 413]:
+                return {}
+            else:
+                time.sleep(self.sleep_time)
+
+    @staticmethod
+    def _search_dict(partial: Any, search_key: str) -> Generator[Any, Any, None]:
+        stack = [partial]
+        while stack:
+            current_item = stack.pop()
+            if isinstance(current_item, dict):
+                for key, value in current_item.items():
+                    if key == search_key:
+                        yield value
+                    else:
+                        stack.append(value)
+            elif isinstance(current_item, list):
+                for value in current_item:
+                    stack.append(value)
+
+    def _fetch_comments(self, until_datetime: Optional[datetime] = None) -> Generator[Any, Any, None]:
+        session = requests.Session()
+        session.headers['User-Agent'] = self.user_agent
+        response = session.get(self.video_url)
+
+        if response.request and response.request.url and 'uxe=' in response.request.url:
+            session.cookies.set('CONSENT', 'YES+cb', domain='.youtube.com')  # type: ignore[no-untyped-call]
+            response = session.get(self.video_url)
+
+        html = response.text
+        ytcfg = json.loads(self._regex_search(html, self._YT_CFG_REGEX))
+        if not ytcfg:
+            return  # Unable to extract configuration
+        if self.lang_code:
+            ytcfg['INNERTUBE_CONTEXT']['client']['hl'] = self.lang_code
+
+        data = json.loads(self._regex_search(html, self._YT_INITIAL_DATA_REGEX))
+
+        section = next(self._search_dict(data, 'itemSectionRenderer'), None)
+        renderer = next(self._search_dict(section, 'continuationItemRenderer'), None) if section else None
+        if not renderer:
+            # Comments disabled?
+            return
+
+        needs_sorting = self.sort_by != 0
+        continuations = [renderer['continuationEndpoint']]
+        while continuations:
+            continuation = continuations.pop()
+            response = self._ajax_request(session, continuation, ytcfg)
+
+            if not response:
+                break
+            if list(self._search_dict(response, 'externalErrorMessage')):
+                logger.warning('Error returned from server: %s', next(self._search_dict(response, 'externalErrorMessage')))
+                return
+
+            if needs_sorting:
+                sub_menu: Dict[str, Any] = next(self._search_dict(response, 'sortFilterSubMenuRenderer'), {})
+                sort_menu = sub_menu.get('subMenuItems', [])
+                if self.sort_by < len(sort_menu):
+                    continuations = [sort_menu[self.sort_by]['serviceEndpoint']]
+                    needs_sorting = False
+                    continue
+                # TODO: Fix it. Causing observer to fail silently\
+                logger.warning("Unable to set sorting")
+                # raise RuntimeError('Failed to set sorting')
+
+            actions = list(self._search_dict(response, 'reloadContinuationItemsCommand')) + \
+                      list(self._search_dict(response, 'appendContinuationItemsAction'))
+
+            for action in actions:
+                for item in action.get('continuationItems', []):
+                    if action['targetId'] == 'comments-section':
+                        # Process continuations for comments and replies.
+                        continuations[:0] = [ep for ep in self._search_dict(item, 'continuationEndpoint')]
+                    if self.fetch_replies:
+                        # TODO: Fix it. This functionality is broken
+                        if action['targetId'].startswith('comment-replies-item') and 'continuationItemRenderer' in item:
+                            # Process the 'Show more replies' button
+                            continuations.append(next(self._search_dict(item, 'buttonRenderer'))['command'])
+
+            for comment in reversed(list(self._search_dict(response, 'commentRenderer'))):
+                if not self.fetch_replies and "." in comment['commentId']:
+                    continue
+
+                comment_time_string = comment['publishedTimeText']['runs'][0]['text']
+                comment_time_string = comment_time_string or ''
+                comment_time = dateparser.parse(
+                    comment_time_string.split('(edited)', 1)[0].strip(),
+                )
+
+                if comment_time:
+                    comment_time = comment_time.replace(tzinfo=timezone.utc)
+                    if until_datetime and until_datetime > comment_time:
+                        return
+
+                yield {'comment_id': comment['commentId'],
+                       'text': ''.join([c['text'] for c in comment['contentText'].get('runs', [])]),
+                       'time': comment_time,
+                       'author': comment.get('authorText', {}).get('simpleText', ''),
+                       'channel': comment['authorEndpoint']['browseEndpoint'].get('browseId', ''),
+                       'votes': comment.get('voteCount', {}).get('simpleText', '0'),
+                       'photo': comment['authorThumbnail']['thumbnails'][-1]['url'],
+                       'heart': next(self._search_dict(comment, 'isHearted'), False)}
+
+            time.sleep(self.sleep_time)
+
+    def fetch_comments(self, until_datetime: Optional[datetime] = None) -> List[Dict[str, Any]]:
+        comments: List[Dict[str, Any]] = []
+        for comment in self._fetch_comments(until_datetime=until_datetime):
+            comments.append(comment)
+            if self.max_comments and self.max_comments == len(comments):
+                break
+
+        return comments
diff --git a/obsei_module/obsei-master/obsei/payload.py b/obsei_module/obsei-master/obsei/payload.py
new file mode 100644
index 0000000000000000000000000000000000000000..c9673382658962c4a0427f66480ff58674e1219c
--- /dev/null
+++ b/obsei_module/obsei-master/obsei/payload.py
@@ -0,0 +1,33 @@
+from typing import Any, Dict, Optional
+
+from pydantic import BaseModel, Field
+
+
+class BasePayload(BaseModel):
+    segmented_data: Dict[str, Any] = Field({})
+    meta: Dict[str, Any] = Field({})
+    source_name: Optional[str] = "Undefined"
+
+    class Config:
+        arbitrary_types_allowed = True
+
+
+class TextPayload(BasePayload):
+    processed_text: str
+
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "processed_text": self.processed_text,
+            "segmented_data": self.segmented_data,
+            "meta": self.meta,
+            "source_name": self.source_name,
+        }
+
+    def is_contains_classification_payload(self) -> bool:
+        if self.segmented_data:
+            if "classifier_data" in self.segmented_data:
+                return True
+        return False
+
+    class Config:
+        arbitrary_types_allowed = True
diff --git a/obsei_module/obsei-master/obsei/postprocessor/__init__.py b/obsei_module/obsei-master/obsei/postprocessor/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/obsei_module/obsei-master/obsei/postprocessor/base_postprocessor.py b/obsei_module/obsei-master/obsei/postprocessor/base_postprocessor.py
new file mode 100644
index 0000000000000000000000000000000000000000..105b67f8fb3e816ba65b4434ed7f4e27211f8b56
--- /dev/null
+++ b/obsei_module/obsei-master/obsei/postprocessor/base_postprocessor.py
@@ -0,0 +1,26 @@
+from typing import List, Any
+
+from pydantic_settings import BaseSettings
+
+from obsei.payload import TextPayload
+from abc import abstractmethod
+
+
+class BasePostprocessorConfig(BaseSettings):
+    TYPE: str = "Base"
+
+    class Config:
+        multi_label = True
+
+
+class BasePostprocessor(BaseSettings):
+    TYPE: str = "Base"
+
+    @abstractmethod
+    def postprocess_input(
+        self, input_list: List[TextPayload], config: BasePostprocessorConfig, **kwargs: Any
+    ) -> List[TextPayload]:
+        pass
+
+    class Config:
+        arbitrary_types_allowed = True
diff --git a/obsei_module/obsei-master/obsei/postprocessor/inference_aggregator.py b/obsei_module/obsei-master/obsei/postprocessor/inference_aggregator.py
new file mode 100644
index 0000000000000000000000000000000000000000..cd1ee12a8b3c06bfff5e28088034fdc941ac7159
--- /dev/null
+++ b/obsei_module/obsei-master/obsei/postprocessor/inference_aggregator.py
@@ -0,0 +1,55 @@
+from typing import List, Optional, Dict, Any
+
+from obsei.payload import TextPayload
+from obsei.postprocessor.base_postprocessor import (
+    BasePostprocessorConfig,
+    BasePostprocessor
+)
+from obsei.postprocessor.inference_aggregator_function import BaseInferenceAggregateFunction
+from obsei.preprocessor.text_splitter import TextSplitterPayload
+
+
+class InferenceAggregatorConfig(BasePostprocessorConfig):
+    aggregate_function: BaseInferenceAggregateFunction
+
+
+class InferenceAggregator(BasePostprocessor):
+    def postprocess_input(  # type: ignore[override]
+        self, input_list: List[TextPayload], config: InferenceAggregatorConfig, **kwargs: Any
+    ) -> List[TextPayload]:
+
+        aggregated_payloads = self.segregate_payload(input_list)
+        postproces_output: List[TextPayload] = []
+        for key, payload_list in aggregated_payloads.items():
+            postproces_output.extend(
+                config.aggregate_function.execute(payload_list)
+            )
+
+        return postproces_output
+
+    @staticmethod
+    def segregate_payload(
+        input_list: List[TextPayload],
+    ) -> Dict[str, List[TextPayload]]:
+        segregated_payload: Dict[str, List[TextPayload]] = {}
+
+        # segregate payload
+        for idx, payload in enumerate(input_list):
+            splitter_data: Optional[TextSplitterPayload] = (
+                payload.meta.get("splitter", None) if payload.meta else None
+            )
+            doc_id = splitter_data.document_id if splitter_data else str(idx)
+            if doc_id not in segregated_payload:
+                segregated_payload[doc_id] = []
+            segregated_payload[doc_id].append(payload)
+
+        # sort based on chunk id
+        for doc_id, payloads in segregated_payload.items():
+            if (
+                len(payloads) > 0
+                and payloads[0].meta
+                and payloads[0].meta.get("splitter", None)
+            ):
+                payloads.sort(key=lambda x: x.meta["splitter"].chunk_id)  # type: ignore[no-any-return]
+
+        return segregated_payload
diff --git a/obsei_module/obsei-master/obsei/postprocessor/inference_aggregator_function.py b/obsei_module/obsei-master/obsei/postprocessor/inference_aggregator_function.py
new file mode 100644
index 0000000000000000000000000000000000000000..d09164b6161c667ff463ff8c1754ed49ef49604b
--- /dev/null
+++ b/obsei_module/obsei-master/obsei/postprocessor/inference_aggregator_function.py
@@ -0,0 +1,127 @@
+import logging
+from abc import abstractmethod
+from typing import Any, Dict, List, Tuple
+
+from pydantic import BaseModel
+
+from obsei.payload import TextPayload
+
+logger = logging.getLogger(__name__)
+
+
+class BaseInferenceAggregateFunction(BaseModel):
+    @abstractmethod
+    def execute(
+        self, input_list: List[TextPayload], **kwargs: Any
+    ) -> List[TextPayload]:
+        pass
+
+    @staticmethod
+    def _extract_merged_parameters(
+        input_list: List[TextPayload],
+    ) -> Tuple[List[str], int, Dict[str, Any]]:
+        document_length: int = 0
+        meta: Dict[str, Any] = {}
+        doc_text: List[str] = []
+        # Merge meta across payload and collect score keys
+        for payload in input_list:
+            document_length += len(payload.processed_text)
+            meta = {**meta, **payload.meta} if payload.meta else meta
+            # Remove splitter key from meta
+            meta.pop("splitter")
+            doc_text.append(payload.processed_text)
+        return doc_text, document_length, meta
+
+
+class ClassificationAverageScore(BaseInferenceAggregateFunction):
+    name: str = "ClassificationAverageScore"
+    default_value: float = 0.0
+
+    def execute(
+        self, input_list: List[TextPayload], **kwargs: Any
+    ) -> List[TextPayload]:
+        if len(input_list) == 0:
+            logger.warning("Can't aggregate empty list")
+            return input_list
+
+        if not input_list[0].is_contains_classification_payload():
+            logger.warning(
+                "ClassificationAverage supports Classification and Sentiment Analyzers only"
+            )
+            return input_list
+
+        default_value = kwargs.get("default_value", self.default_value)
+
+        source_name = input_list[0].source_name
+
+        doc_text, document_length, meta = self._extract_merged_parameters(input_list)
+
+        # Perform average based on chunk length
+        scores: Dict[str, float] = {}
+        for payload in input_list:
+            if payload.segmented_data:
+                for key, value in payload.segmented_data.get("classifier_data", {}).items():
+                    ratio = len(payload.processed_text) / document_length
+                    scores[key] = scores.get(key, default_value) + value * ratio
+
+        return [
+            TextPayload(
+                processed_text=" ".join(doc_text),
+                meta=meta,
+                segmented_data={
+                    "aggregator_data": {
+                        "avg_score": scores,
+                        "aggregator_name": self.name,
+                    }
+                },
+                source_name=source_name,
+            )
+        ]
+
+
+class ClassificationMaxCategories(BaseInferenceAggregateFunction):
+    name: str = "ClassificationMaxCategories"
+    score_threshold: float = 0.5
+
+    def execute(
+        self, input_list: List[TextPayload], **kwargs: Any
+    ) -> List[TextPayload]:
+        if len(input_list) == 0:
+            logger.warning("Can't aggregate empty list")
+            return input_list
+
+        if not input_list[0].is_contains_classification_payload():
+            logger.warning(
+                "ClassificationAverage supports Classification and Sentiment Analyzers only"
+            )
+            return input_list
+
+        score_threshold = kwargs.get("score_threshold", self.score_threshold)
+
+        source_name = input_list[0].source_name
+
+        doc_text, _, meta = self._extract_merged_parameters(input_list)
+
+        max_scores: Dict[str, float] = {}
+        category_count: Dict[str, int] = {}
+        for payload in input_list:
+            if payload.segmented_data:
+                for key, value in payload.segmented_data.get("classifier_data", {}).items():
+                    if value > score_threshold:
+                        category_count[key] = category_count.get(key, 0) + 1
+                        max_scores[key] = max(max_scores.get(key, 0.0), value)
+
+        return [
+            TextPayload(
+                processed_text=" ".join(doc_text),
+                meta=meta,
+                segmented_data={
+                    "aggregator_data": {
+                        "category_count": category_count,
+                        "max_scores": max_scores,
+                        "aggregator_name": self.name,
+                    }
+                },
+                source_name=source_name,
+            )
+        ]
diff --git a/obsei_module/obsei-master/obsei/preprocessor/__init__.py b/obsei_module/obsei-master/obsei/preprocessor/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/obsei_module/obsei-master/obsei/preprocessor/base_preprocessor.py b/obsei_module/obsei-master/obsei/preprocessor/base_preprocessor.py
new file mode 100644
index 0000000000000000000000000000000000000000..5af9321e5c8466f5f32a14a84b9114083796919e
--- /dev/null
+++ b/obsei_module/obsei-master/obsei/preprocessor/base_preprocessor.py
@@ -0,0 +1,26 @@
+from abc import abstractmethod
+from typing import List, Any
+
+from pydantic_settings import BaseSettings
+
+from obsei.payload import TextPayload
+
+
+class BaseTextProcessorConfig(BaseSettings):
+    TYPE: str = "Base"
+
+    class Config:
+        arbitrary_types_allowed = True
+
+
+class BaseTextPreprocessor(BaseSettings):
+    TYPE: str = "Base"
+
+    @abstractmethod
+    def preprocess_input(
+        self, input_list: List[TextPayload], config: BaseTextProcessorConfig, **kwargs: Any
+    ) -> List[TextPayload]:
+        pass
+
+    class Config:
+        arbitrary_types_allowed = True
diff --git a/obsei_module/obsei-master/obsei/preprocessor/text_cleaner.py b/obsei_module/obsei-master/obsei/preprocessor/text_cleaner.py
new file mode 100644
index 0000000000000000000000000000000000000000..43a95662af3f833e0ec9b82254a095ef36ecf7e6
--- /dev/null
+++ b/obsei_module/obsei-master/obsei/preprocessor/text_cleaner.py
@@ -0,0 +1,76 @@
+import traceback
+import logging
+from typing import List, Any, Optional, Tuple
+
+from obsei.payload import TextPayload
+from obsei.preprocessor.base_preprocessor import (
+    BaseTextPreprocessor,
+    BaseTextProcessorConfig,
+)
+from obsei.preprocessor.text_cleaning_function import TextCleaningFunction, ToLowerCase, RemoveWhiteSpaceAndEmptyToken, \
+    RemovePunctuation, RemoveSpecialChars, DecodeUnicode, RemoveDateTime, ReplaceDomainKeywords, TokenStemming, \
+    RemoveStopWords
+from obsei.preprocessor.text_tokenizer import BaseTextTokenizer, NLTKTextTokenizer
+
+cleaner_logger: logging.Logger = logging.getLogger(__name__)
+
+
+class TextCleanerConfig(BaseTextProcessorConfig):
+    cleaning_functions: Optional[List[TextCleaningFunction]] = None
+    stop_words_language: Optional[str] = "english"
+    stop_words: Optional[List[str]] = None
+    domain_keywords: Optional[Tuple[str, str]] = None
+    disable_tokenization: bool = False
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+
+        if not self.cleaning_functions:
+            self.cleaning_functions = [
+                ToLowerCase(),
+                RemoveWhiteSpaceAndEmptyToken(),
+                RemovePunctuation(),
+                RemoveSpecialChars(),
+                DecodeUnicode(),
+                RemoveDateTime(),
+                ReplaceDomainKeywords(domain_keywords=self.domain_keywords),
+                TokenStemming(),
+                RemoveStopWords(
+                    language=self.stop_words_language, stop_words=self.stop_words
+                ),
+                RemoveWhiteSpaceAndEmptyToken(),
+            ]
+
+
+class TextCleaner(BaseTextPreprocessor):
+    text_tokenizer: Optional[BaseTextTokenizer] = None
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+        self.text_tokenizer = self.text_tokenizer or NLTKTextTokenizer()
+
+    def preprocess_input(  # type: ignore[override]
+        self,
+        input_list: List[TextPayload],
+        config: TextCleanerConfig,
+        **kwargs: Any,
+    ) -> List[TextPayload]:
+        if config.cleaning_functions is None:
+            return input_list
+        for input_data in input_list:
+            if self.text_tokenizer is None or config.disable_tokenization:
+                tokens = [input_data.processed_text]
+            else:
+                tokens = self.text_tokenizer.tokenize_text(
+                    input_data.processed_text
+                )
+            for cleaning_function in config.cleaning_functions:
+                try:
+                    tokens = cleaning_function.execute(tokens)
+                except Exception as ex:
+                    cleaner_logger.warning(f"Received exception: {ex}")
+                    traceback.print_exc()
+
+            input_data.processed_text = " ".join(tokens)
+
+        return input_list
diff --git a/obsei_module/obsei-master/obsei/preprocessor/text_cleaning_function.py b/obsei_module/obsei-master/obsei/preprocessor/text_cleaning_function.py
new file mode 100644
index 0000000000000000000000000000000000000000..0ec4b98986fbfd3839d86ff486e5928705ba92ce
--- /dev/null
+++ b/obsei_module/obsei-master/obsei/preprocessor/text_cleaning_function.py
@@ -0,0 +1,176 @@
+import logging
+import re
+import string
+from abc import abstractmethod
+from typing import Any, List, Optional, Tuple
+from unicodedata import normalize
+
+import nltk
+import spacy
+from dateutil.parser import parse
+from nltk.corpus import stopwords
+from pydantic import BaseModel, PrivateAttr, Field
+from spacy import Language  # type: ignore
+from spacy.cli import download  # type: ignore
+
+cleaner_func_logger: logging.Logger = logging.getLogger(__name__)
+
+
+class TextCleaningFunction(BaseModel):
+    @abstractmethod
+    def execute(self, tokens: List[str], **kwargs: Any) -> List[str]:
+        pass
+
+
+class ToLowerCase(TextCleaningFunction):
+    def execute(self, tokens: List[str], **kwargs: Any) -> List[str]:
+        return [token.lower() for token in tokens]
+
+
+class RemoveWhiteSpaceAndEmptyToken(TextCleaningFunction):
+    def execute(self, tokens: List[str], **kwargs: Any) -> List[str]:
+        striped_tokens = [token.strip() for token in tokens]
+        return [token for token in striped_tokens if token != ""]
+
+
+# Removes words that don't add any meaning to the sequence
+class RemoveStopWords(TextCleaningFunction):
+    stop_words: Optional[List[str]] = None
+    language: Optional[str] = "english"
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+        if not self.stop_words:
+            try:
+                nltk.data.find("stopwords")
+            except LookupError:
+                nltk.download("stopwords")
+            self.stop_words = stopwords.words(self.language)
+
+    def execute(self, tokens: List[str], **kwargs: Any) -> List[str]:
+        if not self.stop_words:
+            return tokens
+        return [token for token in tokens if token not in self.stop_words]
+
+
+class RemovePunctuation(TextCleaningFunction):
+    def execute(self, tokens: List[str], **kwargs: Any) -> List[str]:
+        return [
+            token.translate(token.maketrans("", "", string.punctuation))
+            for token in tokens
+            if len(token.translate(token.maketrans("", "", string.punctuation)))
+        ]
+
+
+# Transforms tokens to standardized form
+class TokenStemming(TextCleaningFunction):
+    stemmer: Optional[Any] = None
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+        if not self.stemmer:
+            try:
+                from nltk.stem import PorterStemmer
+
+                self.stemmer = PorterStemmer()
+            except ImportError:
+                cleaner_func_logger.warning(
+                    "NLTK module is not installed hence token stemming will not work"
+                )
+
+    def execute(self, tokens: List[str], **kwargs: Any) -> List[str]:
+        if not self.stemmer:
+            return tokens
+        return [self.stemmer.stem(token) for token in tokens]
+
+
+class RemoveSpecialChars(TextCleaningFunction):
+    """
+    Removes special characters by eliminating all characters from each token
+    and only retains alphabetic, numeric or alphanumeric tokens by stripping
+    special characters from them
+    """
+
+    def execute(self, tokens: List[str], **kwargs: Any) -> List[str]:
+        cleaned_tokens = [re.sub("[^A-Za-z0-9]+", "", token) for token in tokens]
+        return [token for token in cleaned_tokens if token != ""]
+
+
+# Converts unicodes to ASCII characters
+class DecodeUnicode(TextCleaningFunction):
+    def execute(self, tokens: List[str], **kwargs: Any) -> List[str]:
+        return [
+            normalize("NFKD", token).encode("ascii", "ignore").decode("utf-8")
+            for token in tokens
+        ]
+
+
+class RemoveDateTime(TextCleaningFunction):
+    _white_space_cleaner = RemoveWhiteSpaceAndEmptyToken()
+
+    def execute(self, tokens: List[str], **kwargs: Any) -> List[str]:
+        text: str = " ".join(tokens)
+        try:
+            fuzzy_tokens: Tuple[str]
+            _, fuzzy_tokens = parse(text, fuzzy_with_tokens=True)  # type: ignore
+            tokens = " ".join(fuzzy_tokens).split()
+        except ValueError:
+            cleaner_func_logger.warning("Token contain invalid date time format")
+        return self._white_space_cleaner.execute(tokens)
+
+
+# Replaces domain specific keywords
+class ReplaceDomainKeywords(TextCleaningFunction):
+    domain_keywords: Optional[List[Tuple[str, str]]] = None
+
+    def execute(self, tokens: List[str], **kwargs: Any) -> List[str]:
+        # don't do anything when no domain keywords specified
+        if not self.domain_keywords or len(self.domain_keywords) == 0:
+            return tokens
+
+        text: str = " ".join(tokens)
+        for source_keyword, target_keyword in self.domain_keywords:
+            if source_keyword in text or source_keyword.lower() in text:
+                text = text.replace(source_keyword, target_keyword)
+        tokens = text.split()
+        return tokens
+
+
+class RegExSubstitute(TextCleaningFunction):
+    pattern: Optional[str] = None
+    substitute: Optional[str] = None
+
+    def execute(self, tokens: List[str], **kwargs: Any) -> List[str]:
+        if not self.pattern or not self.substitute:
+            return tokens
+
+        compiled_regex = re.compile(self.pattern)
+
+        return [compiled_regex.sub(self.substitute, token) for token in tokens]
+
+
+class SpacyLemmatization(TextCleaningFunction):
+    _nlp: Language = PrivateAttr()
+    model_name_or_path: str = Field("en_core_web_sm")
+    batch_size: int = 4
+    n_process: int = 1
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+        try:
+            self._nlp = spacy.load(
+                self.model_name_or_path,
+                disable=["parser", "ner"],
+            )
+        except:
+            download(self.model_name_or_path)
+            self._nlp = spacy.load(
+                self.model_name_or_path,
+                disable=["parser", "ner"],
+            )
+
+    def execute(self, tokens: List[str], **kwargs: Any) -> List[str]:
+        processed_tokens: List[str] = []
+        for doc in self._nlp.pipe(texts=tokens, batch_size=self.batch_size, n_process=self.n_process):
+            processed_tokens.append(" ".join([token.lemma_ for token in doc]))
+        return processed_tokens
diff --git a/obsei_module/obsei-master/obsei/preprocessor/text_splitter.py b/obsei_module/obsei-master/obsei/preprocessor/text_splitter.py
new file mode 100644
index 0000000000000000000000000000000000000000..9124d367567bcbbef8149965afc2a750c16664f2
--- /dev/null
+++ b/obsei_module/obsei-master/obsei/preprocessor/text_splitter.py
@@ -0,0 +1,129 @@
+import logging
+from typing import List, Optional, Any
+import uuid
+
+import nltk
+from nltk import sent_tokenize
+from pydantic import BaseModel
+
+from obsei.payload import TextPayload
+from obsei.preprocessor.base_preprocessor import (
+    BaseTextPreprocessor,
+    BaseTextProcessorConfig,
+)
+
+logger = logging.getLogger(__name__)
+
+
+class TextSplitterPayload(BaseModel):
+    phrase: str
+    chunk_id: int
+    chunk_length: int
+    document_id: str
+    total_chunks: Optional[int] = None
+
+
+class TextSplitterConfig(BaseTextProcessorConfig):
+    max_split_length: int = 512
+    split_stride: int = 0  # overlap length
+    document_id_key: Optional[str] = None  # document_id in meta
+    enable_sentence_split: bool = False
+    honor_paragraph_boundary: bool = False
+    paragraph_marker: str = '\n\n'
+    sentence_tokenizer: str = 'tokenizers/punkt/PY3/english.pickle'
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+
+        if self.enable_sentence_split:
+            nltk.download('punkt')
+
+
+class TextSplitter(BaseTextPreprocessor):
+    def preprocess_input(  # type: ignore[override]
+        self, input_list: List[TextPayload], config: TextSplitterConfig, **kwargs: Any
+    ) -> List[TextPayload]:
+        text_splits: List[TextPayload] = []
+
+        for idx, input_data in enumerate(input_list):
+            if (
+                config.document_id_key
+                and input_data.meta
+                and config.document_id_key in input_data.meta
+            ):
+                document_id = str(input_data.meta.get(config.document_id_key))
+            else:
+                document_id = uuid.uuid4().hex
+
+            if config.honor_paragraph_boundary:
+                paragraphs = input_data.processed_text.split(config.paragraph_marker)
+            else:
+                paragraphs = [input_data.processed_text]
+
+            atomic_texts: List[str] = []
+            for paragraph in paragraphs:
+                if config.enable_sentence_split:
+                    atomic_texts.extend(sent_tokenize(paragraph))
+                else:
+                    atomic_texts.append(paragraph)
+
+            split_id = 0
+            document_splits: List[TextSplitterPayload] = []
+            for text in atomic_texts:
+                text_length = len(text)
+                if text_length == 0:
+                    continue
+
+                start_idx = 0
+                while start_idx < text_length:
+                    if config.split_stride > 0 and start_idx > 0:
+                        start_idx = (
+                            self._valid_index(
+                                text, start_idx - config.split_stride
+                            )
+                            + 1
+                        )
+                    end_idx = self._valid_index(
+                        text,
+                        min(start_idx + config.max_split_length, text_length),
+                    )
+
+                    phrase = text[start_idx:end_idx]
+                    document_splits.append(
+                        TextSplitterPayload(
+                            phrase=phrase,
+                            chunk_id=split_id,
+                            chunk_length=len(phrase),
+                            document_id=document_id,
+                        )
+                    )
+                    start_idx = end_idx + 1
+                    split_id += 1
+
+            total_splits = len(document_splits)
+            for split in document_splits:
+                split.total_chunks = total_splits
+                payload = TextPayload(
+                    processed_text=split.phrase,
+                    source_name=input_data.source_name,
+                    segmented_data=input_data.segmented_data,
+                    meta={**input_data.meta, **{"splitter": split}}
+                    if input_data.meta
+                    else {"splitter": split},
+                )
+                text_splits.append(payload)
+
+        return text_splits
+
+    @staticmethod
+    def _valid_index(document: str, idx: int) -> int:
+        if idx <= 0:
+            return 0
+        if idx >= len(document):
+            return len(document)
+        new_idx = idx
+        while new_idx > 0:
+            if document[new_idx] in [" ", "\n", "\t"]:
+                break
+            new_idx -= 1
+        return new_idx
diff --git a/obsei_module/obsei-master/obsei/preprocessor/text_tokenizer.py b/obsei_module/obsei-master/obsei/preprocessor/text_tokenizer.py
new file mode 100644
index 0000000000000000000000000000000000000000..5632c43d2ba0f9f01a69de333ea764b6653b11d6
--- /dev/null
+++ b/obsei_module/obsei-master/obsei/preprocessor/text_tokenizer.py
@@ -0,0 +1,29 @@
+import logging
+from abc import abstractmethod
+from typing import Any, List, Optional
+
+import nltk
+from nltk import word_tokenize
+from pydantic import BaseModel
+
+logger = logging.getLogger(__name__)
+
+
+class BaseTextTokenizer(BaseModel):
+    @abstractmethod
+    def tokenize_text(self, text: str) -> List[str]:
+        pass
+
+
+class NLTKTextTokenizer(BaseTextTokenizer):
+    tokenizer_name: Optional[str] = "punkt"
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+        try:
+            nltk.data.find(f"tokenizers/{self.tokenizer_name}")
+        except LookupError:
+            nltk.download(f"{self.tokenizer_name}")
+
+    def tokenize_text(self, text: str) -> Any:
+        return word_tokenize(text)
diff --git a/obsei_module/obsei-master/obsei/process_workflow.py b/obsei_module/obsei-master/obsei/process_workflow.py
new file mode 100644
index 0000000000000000000000000000000000000000..cbd74b11ab26d10f1aeb344e0b7f30bfa1b687b1
--- /dev/null
+++ b/obsei_module/obsei-master/obsei/process_workflow.py
@@ -0,0 +1,38 @@
+import logging
+
+from obsei.analyzer.base_analyzer import BaseAnalyzer, BaseAnalyzerConfig
+from obsei.configuration import ObseiConfiguration
+from obsei.sink.base_sink import BaseSink, BaseSinkConfig
+from obsei.source.base_source import BaseSourceConfig, BaseSource
+
+logger = logging.getLogger(__name__)
+
+# Extract config via yaml file using `config_path` and `config_filename`
+obsei_configuration = ObseiConfiguration()
+
+# Initialize objects using configuration
+source_config: BaseSourceConfig = obsei_configuration.initialize_instance("source_config")
+source: BaseSource = obsei_configuration.initialize_instance("source")
+analyzer: BaseAnalyzer = obsei_configuration.initialize_instance("analyzer")
+analyzer_config: BaseAnalyzerConfig = obsei_configuration.initialize_instance("analyzer_config")
+sink_config: BaseSinkConfig = obsei_configuration.initialize_instance("sink_config")
+sink: BaseSink = obsei_configuration.initialize_instance("sink")
+
+# This will fetch information from configured source ie twitter, app store etc
+source_response_list = source.lookup(source_config)
+for idx, source_response in enumerate(source_response_list):
+    logger.info(f"source_response#'{idx}'='{vars(source_response)}'")
+
+# This will execute analyzer (Sentiment, classification etc) on source data with provided analyzer_config
+# Analyzer will it's output to `segmented_data` inside `analyzer_response`
+analyzer_response_list = analyzer.analyze_input(
+    source_response_list=source_response_list,
+    analyzer_config=analyzer_config
+)
+for idx, analyzer_response in enumerate(analyzer_response_list):
+    logger.info(f"source_response#'{idx}'='{vars(analyzer_response)}'")
+
+# This will send analyzed output to configure sink ie Slack, Zendesk etc
+sink_response_list = sink.send_data(analyzer_response_list, sink_config)
+for idx, sink_response in enumerate(sink_response_list):
+    logger.info(f"source_response#'{idx}'='{vars(sink_response)}'")
diff --git a/obsei_module/obsei-master/obsei/processor.py b/obsei_module/obsei-master/obsei/processor.py
new file mode 100644
index 0000000000000000000000000000000000000000..20ccec4b3d85dee3949e83a37c24fe3ed8c7e49e
--- /dev/null
+++ b/obsei_module/obsei-master/obsei/processor.py
@@ -0,0 +1,68 @@
+import logging
+from typing import Optional
+
+from pydantic import BaseModel
+
+from obsei.analyzer.base_analyzer import BaseAnalyzer, BaseAnalyzerConfig
+from obsei.sink.base_sink import BaseSink, BaseSinkConfig
+from obsei.source.base_source import BaseSource, BaseSourceConfig
+from obsei.workflow.workflow import Workflow
+
+logger = logging.getLogger(__name__)
+
+
+class Processor(BaseModel):
+    analyzer: BaseAnalyzer
+    analyzer_config: Optional[BaseAnalyzerConfig] = None
+    source: Optional[BaseSource] = None
+    source_config: Optional[BaseSourceConfig] = None
+    sink: Optional[BaseSink] = None
+    sink_config: Optional[BaseSinkConfig] = None
+
+    def process(
+        self,
+        workflow: Optional[Workflow] = None,
+        source: Optional[BaseSource] = None,
+        source_config: Optional[BaseSourceConfig] = None,
+        sink: Optional[BaseSink] = None,
+        sink_config: Optional[BaseSinkConfig] = None,
+        analyzer: Optional[BaseAnalyzer] = None,
+        analyzer_config: Optional[BaseAnalyzerConfig] = None,
+    ) -> None:
+        source = source or self.source
+        sink = sink or self.sink
+        analyzer = analyzer or self.analyzer
+
+        id: Optional[str] = None
+        if workflow:
+            sink_config = workflow.config.sink_config
+            source_config = workflow.config.source_config
+            analyzer_config = workflow.config.analyzer_config
+            id = workflow.id
+        else:
+            sink_config = sink_config or self.sink_config
+            source_config = source_config or self.source_config
+            analyzer_config = analyzer_config or self.analyzer_config
+
+        if source is None or source_config is None:
+            return
+        if sink is None or sink_config is None:
+            return
+
+        source_response_list = source.lookup(config=source_config, id=id)
+        for idx, source_response in enumerate(source_response_list):
+            logger.info(f"source_response#'{idx}'='{source_response}'")
+
+        analyzer_response_list = analyzer.analyze_input(
+            source_response_list=source_response_list,
+            analyzer_config=analyzer_config,
+            id=id,
+        )
+        for idx, analyzer_response in enumerate(analyzer_response_list):
+            logger.info(f"source_response#'{idx}'='{analyzer_response}'")
+
+        sink_response_list = sink.send_data(
+            analyzer_responses=analyzer_response_list, config=sink_config, id=id
+        )
+        for idx, sink_response in enumerate(sink_response_list):
+            logger.info(f"source_response#'{idx}'='{sink_response}'")
diff --git a/obsei_module/obsei-master/obsei/sink/__init__.py b/obsei_module/obsei-master/obsei/sink/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/obsei_module/obsei-master/obsei/sink/base_sink.py b/obsei_module/obsei-master/obsei/sink/base_sink.py
new file mode 100644
index 0000000000000000000000000000000000000000..a0676fb7db4132085ff54bc9c8d71af8c9b58ada
--- /dev/null
+++ b/obsei_module/obsei-master/obsei/sink/base_sink.py
@@ -0,0 +1,54 @@
+from abc import abstractmethod
+from typing import Any, Dict, List, Optional, Type, TypeVar
+
+from pydantic import Field
+from pydantic_settings import BaseSettings
+
+from obsei.payload import TextPayload
+from obsei.workflow.base_store import BaseStore
+
+
+class Convertor(BaseSettings):
+    def convert(
+            self,
+            analyzer_response: TextPayload,
+            base_payload: Optional[Dict[str, Any]] = None,
+            **kwargs: Any
+    ) -> Dict[str, Any]:
+        base_payload = base_payload or dict()
+        return (
+            {**base_payload, **analyzer_response.to_dict()}
+            if base_payload is not None
+            else analyzer_response.to_dict()
+        )
+
+    class Config:
+        arbitrary_types_allowed = True
+
+
+T = TypeVar('T', bound='BaseSinkConfig')
+
+
+class BaseSinkConfig(BaseSettings):
+    TYPE: str = "Base"
+
+    @classmethod
+    def from_dict(cls: Type[T], config: Dict[str, Any]) -> T:  # type: ignore[empty-body]
+        pass
+
+    class Config:
+        arbitrary_types_allowed = True
+
+
+class BaseSink(BaseSettings):
+    convertor: Convertor = Field(Convertor())
+    store: Optional[BaseStore] = None
+
+    @abstractmethod
+    def send_data(
+            self, analyzer_responses: List[TextPayload], config: BaseSinkConfig, **kwargs: Any
+    ) -> Any:
+        pass
+
+    class Config:
+        arbitrary_types_allowed = True
diff --git a/obsei_module/obsei-master/obsei/sink/dailyget_sink.py b/obsei_module/obsei-master/obsei/sink/dailyget_sink.py
new file mode 100644
index 0000000000000000000000000000000000000000..5c501a7a7940b15dcbeeee4d23172f94269ed37d
--- /dev/null
+++ b/obsei_module/obsei-master/obsei/sink/dailyget_sink.py
@@ -0,0 +1,166 @@
+import json
+import logging
+from copy import deepcopy
+from datetime import timezone
+from typing import Any, Dict, List, Optional
+
+import pytz
+import requests
+from dateutil import parser
+
+from obsei.sink.base_sink import Convertor
+from obsei.sink.http_sink import HttpSink, HttpSinkConfig
+from obsei.payload import TextPayload
+from obsei.misc.utils import flatten_dict
+
+logger = logging.getLogger(__name__)
+
+
+TWITTER_URL_PREFIX = "https://twitter.com/"
+IST_TZ = pytz.timezone("Asia/Kolkata")
+
+
+class PayloadConvertor(Convertor):
+    def convert(
+        self,
+        analyzer_response: TextPayload,
+        base_payload: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> Dict[str, Any]:
+        request_payload = base_payload or {}
+        use_enquiry_api = kwargs.get("use_enquiry_api", False)
+
+        if analyzer_response.source_name != "Twitter":
+            return {**request_payload, **analyzer_response.to_dict()}
+
+        source_information = kwargs["source_information"]
+        partner_id = kwargs["partner_id"]
+
+        user_url = ""
+        positive = 0.0
+        negative = 0.0
+        text = ""
+        tweet_id = None
+        created_at_str = None
+        classification_list: List[str] = []
+
+        flat_dict = flatten_dict(analyzer_response.to_dict())
+        for k, v in flat_dict.items():
+            if "username" in k:
+                user_url = TWITTER_URL_PREFIX + v
+            elif "text" in k:
+                text = str(v).replace("\n", " ")
+            elif "positive" in k:
+                positive = float(v)
+            elif "negative" in k:
+                negative = float(v)
+            elif "meta_id" in k:
+                tweet_id = v
+            elif "created_at" in k:
+                created_at_str = v
+            elif "segmented_data" in k and len(classification_list) < 2:
+                classification_list.append(k.rsplit("_", 1)[1])
+
+        created_at_str_parsed: Optional[str] = None
+        if created_at_str:
+            created_at = parser.isoparse(created_at_str)
+            created_at_str_parsed = (
+                created_at.replace(tzinfo=timezone.utc)
+                .astimezone(tz=IST_TZ)
+                .strftime("%Y-%m-%d %H:%M:%S")
+            )
+
+        tweet_url = f"{user_url}/status/{tweet_id}"
+        # Sentiment rules
+        if negative > 8.0:
+            sentiment = "Strong Negative"
+        elif 0.3 < negative <= 8.0:
+            sentiment = "Negative"
+        elif positive >= 0.8:
+            sentiment = "Strong Positive"
+        elif 0.4 < positive < 0.8:
+            sentiment = "Positive"
+        else:
+            sentiment = "Neutral"
+
+        if use_enquiry_api:
+            enquiry = {
+                "Source": source_information,
+                "FeedbackBy": user_url,
+                "Sentiment": sentiment,
+                "TweetUrl": tweet_url,
+                "FormattedText": text,
+                "PredictedCategories": ",".join(classification_list),
+            }
+
+            if created_at_str_parsed is not None:
+                enquiry["ReportedAt"] = created_at_str_parsed
+
+            kv_str_list = [k + ": " + str(v) for k, v in enquiry.items()]
+            request_payload["enquiryMessage"] = "\n".join(kv_str_list)
+        else:
+            message = {
+                "message": text,
+                "partnerId": partner_id,
+                "query": source_information,
+                "source": analyzer_response.source_name,
+                "url": tweet_url,
+                "userProfile": user_url,
+                "sentiment": sentiment,
+                "predictedCategories": ",".join(classification_list),
+                "metadata": str(json.dumps(analyzer_response.segmented_data, ensure_ascii=False)),
+                "originatedAt": created_at_str,
+            }
+            request_payload["messageDetail"] = str(json.dumps(message, ensure_ascii=False))
+
+        return request_payload
+
+
+class DailyGetSinkConfig(HttpSinkConfig):
+    TYPE: str = "DailyGet"
+    partner_id: str
+    consumer_phone_number: str
+    source_information: str
+    use_enquiry_api: bool = False
+    headers: Dict[str, Any] = {"Content-type": "application/json"}
+
+
+class DailyGetSink(HttpSink):
+    def __init__(self, convertor: Convertor = PayloadConvertor(), **data: Any):
+        super().__init__(convertor=convertor, **data)
+
+    def send_data(  # type: ignore[override]
+        self,
+        analyzer_responses: List[TextPayload],
+        config: DailyGetSinkConfig,
+        **kwargs: Any,
+    ) -> Any:
+        headers = config.headers
+
+        payloads = []
+        responses = []
+        for analyzer_response in analyzer_responses:
+            payloads.append(
+                self.convertor.convert(
+                    analyzer_response=analyzer_response,
+                    base_payload=dict()
+                    if config.base_payload is None
+                    else deepcopy(config.base_payload),
+                    source_information=config.source_information,
+                    use_enquiry_api=config.use_enquiry_api,
+                    partner_id=config.partner_id
+                )
+            )
+
+        for payload in payloads:
+            response = requests.post(
+                url=config.url,
+                json=payload,
+                headers=headers,
+            )
+
+            logger.info(f"payload='{payload}'")
+            logger.info(f"response='{response.__dict__}'")
+            responses.append(response)
+
+        return responses
diff --git a/obsei_module/obsei-master/obsei/sink/elasticsearch_sink.py b/obsei_module/obsei-master/obsei/sink/elasticsearch_sink.py
new file mode 100644
index 0000000000000000000000000000000000000000..8ade40ac9957109d6b91912f9f8f9120bbcd3ea6
--- /dev/null
+++ b/obsei_module/obsei-master/obsei/sink/elasticsearch_sink.py
@@ -0,0 +1,101 @@
+from copy import deepcopy
+from typing import Any, Dict, List, Optional, Union
+
+from elasticsearch import Elasticsearch, RequestError
+from elasticsearch.helpers import bulk
+from pydantic import Field, PrivateAttr, SecretStr
+
+from obsei.sink.base_sink import BaseSink, BaseSinkConfig, Convertor
+from obsei.payload import TextPayload
+
+
+class ElasticSearchSinkConfig(BaseSinkConfig):
+    # This is done to avoid exposing member to API response
+    _es_client: Elasticsearch = PrivateAttr()
+    TYPE: str = "Elasticsearch"
+    hosts: Union[str, List[str], None]
+    index_name: str = "es_index"
+    username: SecretStr = Field(SecretStr(""), env="elasticsearch_username")
+    password: SecretStr = Field(SecretStr(""), env="elasticsearch_password")
+    ca_certs: str = Field("<DEFAULT>")
+    verify_certs: bool = False
+    create_index: bool = True
+    timeout: int = 30
+    custom_mapping: Optional[Dict[str, Any]] = None
+    refresh_type: str = "wait_for"
+    base_payload: Optional[Dict[str, Any]] = None
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+        self._es_client = Elasticsearch(
+            hosts=self.hosts,
+            http_auth=(
+                self.username.get_secret_value(),
+                self.password.get_secret_value(),
+            ),
+            ca_certs=self.ca_certs,
+            verify_certs=self.verify_certs,
+            timeout=self.timeout,
+        )
+        self.base_payload = self.base_payload or {
+            "_op_type": "create",  # TODO update exiting support?
+            "_index": self.index_name,
+        }
+        if self.create_index:
+            self._create_index(self.index_name)
+
+    def _create_index(self, index_name: str) -> None:
+        if self.custom_mapping:
+            mapping = self.custom_mapping
+        else:
+            mapping = {
+                "mappings": {
+                    "dynamic_templates": [
+                        {
+                            "strings": {
+                                "path_match": "*",
+                                "match_mapping_type": "string",
+                                "mapping": {"type": "keyword"},
+                            }
+                        }
+                    ],
+                }
+            }
+
+        try:
+            self._es_client.indices.create(index=index_name, mappings=mapping)
+        except RequestError as e:
+            # With multiple workers we need to avoid race conditions, where:
+            # - there's no index in the beginning
+            # - both want to create one
+            # - one fails as the other one already created it
+            if not self._es_client.indices.exists(index=index_name):
+                raise e
+
+    def bulk(self, payloads: List[Dict[str, Any]]) -> Any:
+        return bulk(
+            self._es_client, payloads, request_timeout=300, refresh=self.refresh_type
+        )
+
+
+class ElasticSearchSink(BaseSink):
+    def __init__(self, convertor: Convertor = Convertor(), **data: Any):
+        super().__init__(convertor=convertor, **data)
+
+    def send_data(  # type: ignore[override]
+        self,
+        analyzer_responses: List[TextPayload],
+        config: ElasticSearchSinkConfig,
+        **kwargs: Any
+    ) -> Any:
+
+        payloads = []
+        for analyzer_response in analyzer_responses:
+            payloads.append(
+                self.convertor.convert(
+                    analyzer_response=analyzer_response,
+                    base_payload=deepcopy(config.base_payload),
+                )
+            )
+
+        return config.bulk(payloads)
diff --git a/obsei_module/obsei-master/obsei/sink/http_sink.py b/obsei_module/obsei-master/obsei/sink/http_sink.py
new file mode 100644
index 0000000000000000000000000000000000000000..6d07845608180ac934e82cbdbb43dc747b7ce6ad
--- /dev/null
+++ b/obsei_module/obsei-master/obsei/sink/http_sink.py
@@ -0,0 +1,49 @@
+from copy import deepcopy
+from typing import Any, Dict, List, Optional
+from urllib.request import Request, urlopen
+
+from obsei.misc.utils import obj_to_json
+from obsei.sink.base_sink import BaseSink, BaseSinkConfig, Convertor
+from obsei.payload import TextPayload
+
+DEFAULT_HEADERS = {"Content-type": "application/json"}
+
+
+class HttpSinkConfig(BaseSinkConfig):
+    TYPE: str = "Http"
+    url: str
+    headers: Optional[Dict[str, Any]] = None
+    base_payload: Optional[Dict[str, Any]] = None
+    # analyzer_output to payload mapping
+    payload_mapping: Optional[Dict[str, List[str]]] = None
+    field_conversion: Optional[Dict[str, str]] = None
+
+
+class HttpSink(BaseSink):
+    def __init__(self, convertor: Convertor = Convertor(), **data: Any):
+        super().__init__(convertor=convertor, **data)
+
+    def send_data(  # type: ignore[override]
+        self, analyzer_responses: List[TextPayload], config: HttpSinkConfig, **kwargs: Any
+    ) -> Any:
+
+        headers = config.headers or DEFAULT_HEADERS
+
+        payloads = []
+        responses = []
+        for analyzer_response in analyzer_responses:
+            payloads.append(
+                self.convertor.convert(
+                    analyzer_response=analyzer_response,
+                    base_payload=dict()
+                    if config.base_payload is None
+                    else deepcopy(config.base_payload),
+                )
+            )
+
+        for payload in payloads:
+            req = Request(config.url, data=obj_to_json(payload), headers=headers)
+            response = urlopen(req)
+            responses.append(response)
+
+        return responses
diff --git a/obsei_module/obsei-master/obsei/sink/jira_sink.py b/obsei_module/obsei-master/obsei/sink/jira_sink.py
new file mode 100644
index 0000000000000000000000000000000000000000..a6a8bef3c51c01024750354ef185c87b0802e49c
--- /dev/null
+++ b/obsei_module/obsei-master/obsei/sink/jira_sink.py
@@ -0,0 +1,106 @@
+import logging
+import textwrap
+from typing import Any, Dict, List, Optional
+
+from atlassian import Jira
+from pydantic import Field, PrivateAttr, SecretStr
+
+from obsei.sink.base_sink import BaseSink, BaseSinkConfig, Convertor
+from obsei.payload import TextPayload
+from obsei.misc.utils import obj_to_markdown
+
+logger = logging.getLogger(__name__)
+
+
+class JiraPayloadConvertor(Convertor):
+    def convert(
+        self,
+        analyzer_response: TextPayload,
+        base_payload: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> Dict[str, Any]:
+        summary_max_length = kwargs.get("summary_max_length", 50)
+
+        payload = base_payload or dict()
+        payload["description"] = obj_to_markdown(
+            obj=analyzer_response,
+            str_enclose_start="{quote}",
+            str_enclose_end="{quote}",
+        )
+        payload["summary"] = textwrap.shorten(
+            text=analyzer_response.processed_text, width=summary_max_length
+        )
+
+        # TODO: Find correct payload to update labels fields
+        labels_count = kwargs.get("labels_count", 1)
+        # labels = [v for k, v in sorted(analyzer_response.segmented_data.items(), key=lambda item: item[1])]
+        # payload['labels'] = [{"name": label} for label in labels[:labels_count]]
+
+        return payload
+
+
+class JiraSinkConfig(BaseSinkConfig):
+    # This is done to avoid exposing member to API response
+    _jira_client: Jira = PrivateAttr()
+    TYPE: str = "Jira"
+    url: str
+    username: Optional[SecretStr] = Field(None, env="jira_username")
+    password: Optional[SecretStr] = Field(None, env="jira_password")
+    issue_type: Dict[str, str]
+    project: Dict[str, str]
+    update_history: bool = True
+    verify_ssl: bool = False
+    summary_max_length: int = 50
+    labels_count: int = 2  # Number of labels to fetch
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+        if self.username is None or self.password is None:
+            raise AttributeError(
+                "Jira informer need username and password"
+            )
+
+        self._jira_client = Jira(
+            url=self.url,
+            username=self.username.get_secret_value(),
+            password=self.password.get_secret_value(),
+            verify_ssl=self.verify_ssl,
+        )
+
+    def get_jira_client(self) -> Jira:
+        return self._jira_client
+
+
+class JiraSink(BaseSink):
+    def __init__(self, convertor: Convertor = JiraPayloadConvertor(), **data: Any):
+        super().__init__(convertor=convertor, **data)
+
+    def send_data(  # type: ignore[override]
+        self,
+        analyzer_responses: List[TextPayload],
+        config: JiraSinkConfig,
+        **kwargs: Any,
+    ) -> Any:
+        responses = []
+        payloads = []
+        for analyzer_response in analyzer_responses:
+            payloads.append(
+                self.convertor.convert(
+                    analyzer_response=analyzer_response,
+                    base_payload={
+                        "project": config.project,
+                        "issuetype": config.issue_type,
+                    },
+                    summary_max_length=config.summary_max_length,
+                    labels_count=config.labels_count,
+                )
+            )
+
+        for payload in payloads:
+            response = config.get_jira_client().create_issue(
+                fields=payload, update_history=config.update_history
+            )
+            logger.info(f"response='{response}'")
+            responses.append(response)
+
+        return responses
diff --git a/obsei_module/obsei-master/obsei/sink/logger_sink.py b/obsei_module/obsei-master/obsei/sink/logger_sink.py
new file mode 100644
index 0000000000000000000000000000000000000000..970da21b61d3ee06221f128b5972ede0a323d1fb
--- /dev/null
+++ b/obsei_module/obsei-master/obsei/sink/logger_sink.py
@@ -0,0 +1,39 @@
+import logging
+from logging import Logger
+from typing import Any, List, Optional
+
+from pydantic import Field
+
+from obsei.payload import TextPayload
+from obsei.sink.base_sink import BaseSink, BaseSinkConfig, Convertor
+
+
+class LoggerSinkConfig(BaseSinkConfig):
+    TYPE: str = "Logging"
+    logger: Logger = Field(logging.getLogger(__name__))
+    level: int = Field(logging.INFO)
+
+
+class LoggerSink(BaseSink):
+    TYPE: str = "Logging"
+
+    def __init__(self, convertor: Convertor = Convertor(), **data: Any):
+        super().__init__(convertor=convertor, **data)
+
+    def send_data(  # type: ignore[override]
+        self,
+        analyzer_responses: List[TextPayload],
+        config: LoggerSinkConfig,
+        **kwargs: Any,
+    ) -> Any:
+        converted_responses = []
+        for analyzer_response in analyzer_responses:
+            converted_responses.append(
+                self.convertor.convert(analyzer_response=analyzer_response)
+            )
+
+        for response in converted_responses:
+            dict_to_print = (
+                vars(response) if hasattr(response, "__dict__") else response
+            )
+            config.logger.log(level=config.level, msg=f"{dict_to_print}")
diff --git a/obsei_module/obsei-master/obsei/sink/pandas_sink.py b/obsei_module/obsei-master/obsei/sink/pandas_sink.py
new file mode 100644
index 0000000000000000000000000000000000000000..98f430faf95d2129c515f5e4d51de6cdd74971c8
--- /dev/null
+++ b/obsei_module/obsei-master/obsei/sink/pandas_sink.py
@@ -0,0 +1,65 @@
+from typing import Any, Dict, List, Optional
+
+from pandas import DataFrame
+
+from obsei.payload import TextPayload
+from obsei.misc.utils import flatten_dict
+from obsei.sink.base_sink import BaseSink, BaseSinkConfig, Convertor
+
+
+class PandasConvertor(Convertor):
+    def convert(
+        self,
+        analyzer_response: TextPayload,
+        base_payload: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> Dict[str, Any]:
+        base_payload = base_payload or {}
+        merged_dict = {**base_payload, **analyzer_response.to_dict()}
+        return flatten_dict(merged_dict)
+
+
+class PandasSinkConfig(BaseSinkConfig):
+    TYPE: str = "Pandas"
+    dataframe: Optional[DataFrame] = None
+    # By default it will include all the columns
+    include_columns_list: Optional[List[str]] = None
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+
+        if self.dataframe is None:
+            self.dataframe = DataFrame()
+
+
+class PandasSink(BaseSink):
+    TYPE: str = "Pandas"
+
+    def __init__(self, convertor: Convertor = PandasConvertor(), **data: Any):
+        super().__init__(convertor=convertor, **data)
+
+    def send_data(  # type: ignore[override]
+        self,
+        analyzer_responses: List[TextPayload],
+        config: PandasSinkConfig,
+        **kwargs: Any,
+    ) -> Any:
+        responses = []
+        for analyzer_response in analyzer_responses:
+            converted_response = self.convertor.convert(
+                analyzer_response=analyzer_response
+            )
+            response: Optional[Dict[str, Any]] = None
+            if config.include_columns_list:
+                response = dict()
+                for k, v in converted_response.items():
+                    if k in config.include_columns_list:
+                        response[k] = v
+            else:
+                response = converted_response
+            responses.append(response)
+
+        if config.dataframe is not None:
+            config.dataframe = config.dataframe.append(responses)
+
+        return config.dataframe
diff --git a/obsei_module/obsei-master/obsei/sink/slack_sink.py b/obsei_module/obsei-master/obsei/sink/slack_sink.py
new file mode 100644
index 0000000000000000000000000000000000000000..ad613c9f4c21cd42013259d74f07bcb580ae1ced
--- /dev/null
+++ b/obsei_module/obsei-master/obsei/sink/slack_sink.py
@@ -0,0 +1,71 @@
+import json
+import logging
+from typing import Any, List, Optional
+
+from jinja2 import Template
+from pydantic import Field, PrivateAttr, SecretStr
+from slack_sdk import WebClient
+
+from obsei.sink.base_sink import BaseSink, BaseSinkConfig
+from obsei.payload import TextPayload
+
+logger = logging.getLogger(__name__)
+
+
+class SlackSinkConfig(BaseSinkConfig):
+    # This is done to avoid exposing member to API response
+    _slack_client: WebClient = PrivateAttr()
+    TYPE: str = "Slack"
+
+    slack_token: Optional[SecretStr] = Field(None, env="slack_token")
+    channel_id: str = Field("", env="slack_channel_id")
+    jinja_template: Optional[str] = None
+    icon_url: str = "https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/obsei_200x200.png"
+    is_markdown: bool = True
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+        if self.slack_token is None or self.channel_id == '':
+            raise AttributeError(
+                "Slack informer need slack_token and channel_id"
+            )
+
+        self._slack_client = WebClient(token=self.slack_token.get_secret_value())
+
+    def get_slack_client(self) -> WebClient:
+        return self._slack_client
+
+
+class SlackSink(BaseSink):
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+
+    def send_data(  # type: ignore[override]
+        self,
+        analyzer_responses: List[TextPayload],
+        config: SlackSinkConfig,
+        **kwargs: Any,
+    ) -> Any:
+        responses = []
+        payloads = []
+        for analyzer_response in analyzer_responses:
+            payloads.append(self.convertor.convert(analyzer_response=analyzer_response))
+
+        for payload in payloads:
+            if config.jinja_template is not None:
+                template = Template(config.jinja_template)
+                message = template.render(payload=payload)
+            else:
+                message = f'Message: `{str(payload["processed_text"])}` '
+                f'```{json.dumps(payload["segmented_data"], indent=2, ensure_ascii=False)}```'
+
+            response = config.get_slack_client().chat_postMessage(
+                channel=config.channel_id,
+                text=message,
+                icon_url=config.icon_url,
+                mrkdwn=config.is_markdown,
+            )
+            logger.info(f"response='{response}'")
+            responses.append(response)
+
+        return responses
diff --git a/obsei_module/obsei-master/obsei/sink/zendesk_sink.py b/obsei_module/obsei-master/obsei/sink/zendesk_sink.py
new file mode 100644
index 0000000000000000000000000000000000000000..2992b6139fa75f83b58925127a1472aeb8b9cdd5
--- /dev/null
+++ b/obsei_module/obsei-master/obsei/sink/zendesk_sink.py
@@ -0,0 +1,148 @@
+import json
+import logging
+import textwrap
+from copy import deepcopy
+
+import requests
+from typing import Any, Dict, List, Mapping, Optional
+
+from pydantic import BaseModel, Field, SecretStr
+
+from obsei.sink.base_sink import BaseSink, BaseSinkConfig, Convertor
+from obsei.payload import TextPayload
+from obsei.misc.utils import obj_to_markdown
+
+logger = logging.getLogger(__name__)
+
+
+class ZendeskPayloadConvertor(Convertor):
+    # Refer https://developer.zendesk.com/api-reference/ticketing/tickets/tickets/#create-ticket
+    # for the payload details
+    def convert(
+        self,
+        analyzer_response: TextPayload,
+        base_payload: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> Dict[str, Any]:
+        summary_max_length = kwargs.get("summary_max_length", 50)
+
+        payload = base_payload or dict()
+
+        if "ticket" not in payload:
+            payload["ticket"] = dict()
+
+        if "comment" not in payload["ticket"]:
+            payload["ticket"]["comment"] = dict()
+
+        # For non-html content, use "body" key
+        payload["html_body"] = obj_to_markdown(
+            obj=analyzer_response,
+            str_enclose_start="{quote}",
+            str_enclose_end="{quote}",
+        )
+
+        payload["subject"] = textwrap.shorten(
+            text=analyzer_response.processed_text, width=summary_max_length
+        )
+
+        if analyzer_response.segmented_data is not None and isinstance(
+            analyzer_response.segmented_data, Mapping
+        ):
+            labels_count = kwargs.get("labels_count", 1)
+            labels = [
+                str(v)
+                for k, v in analyzer_response.segmented_data.items()
+            ]
+            payload["tags"] = [label for label in labels[:labels_count]]
+
+        return payload
+
+
+class ZendeskCredInfo(BaseModel):
+    email: Optional[str] = Field(None, env="zendesk_email")
+    password: Optional[SecretStr] = Field(None, env="zendesk_password")
+    oauth_token: Optional[SecretStr] = Field(None, env="zendesk_oauth_token")
+    token: Optional[SecretStr] = Field(None, env="zendesk_token")
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+
+        if not self.oauth_token and not self.token and not self.email and not self.password:
+            raise ValueError("At least one credential is required")
+
+        if self.password and self.token:
+            raise ValueError("Only one of password or token can be provided")
+
+    def get_session(self) -> requests.Session:
+        session = requests.Session()
+
+        if self.oauth_token:
+            session.headers.update({"Authorization": f'Bearer {self.oauth_token.get_secret_value()}'})
+        elif self.email and self.token:
+            session.auth = (f'{self.email}/token', self.token.get_secret_value())
+        elif self.email and self.password:
+            session.auth = (self.email, self.password.get_secret_value())
+
+        return session
+
+
+class ZendeskSinkConfig(BaseSinkConfig):
+    TYPE: str = "Zendesk"
+    ticket_api: str = Field(default="/api/v2/tickets.json")
+    scheme: str = Field(default="https", env="zendesk_scheme")
+    domain: str = Field(default="zendesk.com", env="zendesk_domain")
+    subdomain: Optional[str] = Field(None, env="zendesk_subdomain")
+    cred_info: Optional[ZendeskCredInfo] = Field(None)
+    summary_max_length: int = 50
+    labels_count: int = 3  # Number of labels to fetch
+    base_payload: Optional[Dict[str, Any]] = None
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+
+        self.cred_info = self.cred_info or ZendeskCredInfo()
+
+    def get_endpoint(self) -> str:
+        sub_prefix = "" if self.subdomain is None or self.subdomain == '' else f"/{self.subdomain}."
+        return f'{self.scheme}://{sub_prefix}{self.domain}{self.ticket_api}'
+
+
+class ZendeskSink(BaseSink):
+    def __init__(self, convertor: Convertor = ZendeskPayloadConvertor(), **data: Any):
+        super().__init__(convertor=convertor, **data)
+
+    def send_data(  # type: ignore[override]
+        self,
+        analyzer_responses: List[TextPayload],
+        config: ZendeskSinkConfig,
+        **kwargs: Any,
+    ) -> Any:
+        responses: List[Any] = []
+        payloads: List[Dict[str, Any]] = []
+
+        if config.cred_info is None:
+            logger.error("Zendesk credentials are not provided")
+            return responses
+
+        for analyzer_response in analyzer_responses:
+            payloads.append(
+                self.convertor.convert(
+                    analyzer_response=analyzer_response,
+                    base_payload=dict()
+                    if config.base_payload is None
+                    else deepcopy(config.base_payload),
+                    summary_max_length=config.summary_max_length,
+                    labels_count=config.labels_count,
+                )
+            )
+
+        for payload in payloads:
+            session = config.cred_info.get_session()
+            response = session.post(
+                config.get_endpoint(),
+                json=json.dumps(payload["segmented_data"], indent=2, ensure_ascii=False)
+            )
+            logger.info(f"response='{response}'")
+            responses.append(response)
+
+        return responses
diff --git a/obsei_module/obsei-master/obsei/source/__init__.py b/obsei_module/obsei-master/obsei/source/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/obsei_module/obsei-master/obsei/source/__pycache__/website_crawler_source.cpython-311.pyc b/obsei_module/obsei-master/obsei/source/__pycache__/website_crawler_source.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e2d6642cbe8b71fd8a91a71c7fcc9df589961c96
Binary files /dev/null and b/obsei_module/obsei-master/obsei/source/__pycache__/website_crawler_source.cpython-311.pyc differ
diff --git a/obsei_module/obsei-master/obsei/source/appstore_scrapper.py b/obsei_module/obsei-master/obsei/source/appstore_scrapper.py
new file mode 100644
index 0000000000000000000000000000000000000000..f180aa23c4de4de307c85a54418ff5334d7be2c8
--- /dev/null
+++ b/obsei_module/obsei-master/obsei/source/appstore_scrapper.py
@@ -0,0 +1,150 @@
+import logging
+import re
+from datetime import datetime, timezone
+from typing import Any, Dict, List, Optional, Tuple
+from urllib import parse
+
+from app_store.app_store_reviews_reader import AppStoreReviewsReader
+
+from obsei.misc.web_search import perform_search
+from obsei.source.base_source import BaseSource, BaseSourceConfig
+from obsei.payload import TextPayload
+from obsei.misc.utils import (
+    DATETIME_STRING_PATTERN,
+    DEFAULT_LOOKUP_PERIOD,
+    convert_utc_time,
+)
+
+logger = logging.getLogger(__name__)
+
+
+class AppStoreScrapperConfig(BaseSourceConfig):
+    TYPE: str = "AppStoreScrapper"
+    app_url: Optional[str] = None
+    countries: Optional[List[str]] = None
+    app_id: Optional[str] = None
+    app_name: Optional[str] = None
+    lookup_period: Optional[str] = None
+    max_count: Optional[int] = None
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+
+        if self.app_url is not None:
+            self.app_id, self.countries, self.app_name = AppStoreScrapperConfig.parse_app_url(self.app_url)
+        else:
+            if not self.app_id and self.app_name:
+                self.app_id = AppStoreScrapperConfig.search_id(self.app_name)
+
+        if not self.app_id:
+            raise ValueError("Valid `package_name`, `app_name` or `app_url` is mandatory")
+
+        self.countries = self.countries or ["us"]
+        self.app_name = self.app_name or self.app_id
+
+    @classmethod
+    def parse_app_url(cls, app_url: str) -> Tuple[Optional[str], Optional[List[str]], Optional[str]]:
+        parsed_url = parse.urlparse(app_url)
+        url_paths = parsed_url.path.split("/")
+
+        countries = app_name = app_id = None
+        if len(url_paths) == 5:
+            countries = [url_paths[1]]
+            app_name = url_paths[3]
+            app_ids = url_paths[4].split("id")
+            app_id = None if len(app_ids) != 2 else app_ids[1]
+
+        return app_id, countries, app_name
+
+    # Code is influenced from https://github.com/cowboy-bebug/app-store-scraper
+    @classmethod
+    def search_id(cls, app_name: str, store: str = "app") -> str:
+        if store == "app":
+            landing_url = "apps.apple.com"
+            request_host = "amp-api.apps.apple.com"
+        else:
+            landing_url = "podcasts.apple.com"
+            request_host = "amp-api.podcasts.apple.com"
+
+        base_request_url = f"https://{request_host}"
+        search_response = perform_search(
+            request_url=base_request_url, query=f"app store {app_name}"
+        )
+
+        pattern = fr"{landing_url}/[a-z]{{2}}/.+?/id([0-9]+)"
+        match_object = re.search(pattern, search_response.text)
+        if match_object:
+            app_id = str(match_object.group(1))
+        else:
+            raise RuntimeError("Pattern matching is not found")
+        return app_id
+
+
+class AppStoreScrapperSource(BaseSource):
+    NAME: Optional[str] = "AppStoreScrapper"
+
+    def lookup(self, config: AppStoreScrapperConfig, **kwargs: Any) -> List[TextPayload]:  # type: ignore[override]
+        source_responses: List[TextPayload] = []
+
+        # Get data from state
+        identifier: str = kwargs.get("id", None)
+        state: Optional[Dict[str, Any]] = (
+            None
+            if identifier is None or self.store is None
+            else self.store.get_source_state(identifier)
+        )
+        update_state: bool = True if identifier else False
+        state = state or dict()
+
+        if config.countries is None or len(config.countries) == 0:
+            logger.warning("`countries` in config should not be empty or None")
+            return source_responses
+
+        for country in config.countries:
+            country_stat: Dict[str, Any] = state.get(country, dict())
+            lookup_period: str = country_stat.get("since_time", config.lookup_period)
+            lookup_period = lookup_period or DEFAULT_LOOKUP_PERIOD
+            if len(lookup_period) <= 5:
+                since_time = convert_utc_time(lookup_period)
+            else:
+                since_time = datetime.strptime(lookup_period, DATETIME_STRING_PATTERN)
+                since_time = since_time.replace(tzinfo=timezone.utc)
+
+            last_since_time: datetime = since_time
+
+            since_id: Optional[int] = country_stat.get("since_id", None)
+            last_index = since_id
+            state[country] = country_stat
+
+            scrapper = AppStoreReviewsReader(country=country, app_id=config.app_id)
+            reviews = scrapper.fetch_reviews(after=since_time, since_id=since_id)
+            reviews = reviews or []
+            if config.max_count is not None and config.max_count < len(reviews):
+                reviews = reviews[: config.max_count]
+
+            for review in reviews:
+                source_responses.append(
+                    TextPayload(
+                        processed_text=f"{review.title}. {review.content}",
+                        meta=vars(review) if hasattr(review, "__dict__") else review,
+                        source_name=self.NAME,
+                    )
+                )
+
+                review_time = review.date.replace(tzinfo=timezone.utc)
+                if review_time < since_time:
+                    break
+                if last_since_time is None or last_since_time < review_time:
+                    last_since_time = review_time
+                if last_index is None or last_index < review.id:
+                    last_index = review.id
+
+            country_stat["since_time"] = last_since_time.strftime(
+                DATETIME_STRING_PATTERN
+            )
+            country_stat["since_id"] = last_index
+
+        if update_state and self.store is not None:
+            self.store.update_source_state(workflow_id=identifier, state=state)
+
+        return source_responses
diff --git a/obsei_module/obsei-master/obsei/source/base_source.py b/obsei_module/obsei-master/obsei/source/base_source.py
new file mode 100644
index 0000000000000000000000000000000000000000..fd2312cf4ef65a0ef58ad21efda401502a99f1c0
--- /dev/null
+++ b/obsei_module/obsei-master/obsei/source/base_source.py
@@ -0,0 +1,25 @@
+from abc import abstractmethod
+from typing import List, Optional, Any
+
+from pydantic_settings import BaseSettings
+
+from obsei.payload import TextPayload
+from obsei.workflow.base_store import BaseStore
+
+
+class BaseSourceConfig(BaseSettings):
+    TYPE: str = "Base"
+
+    class Config:
+        arbitrary_types_allowed = True
+
+
+class BaseSource(BaseSettings):
+    store: Optional[BaseStore] = None
+
+    @abstractmethod
+    def lookup(self, config: BaseSourceConfig, **kwargs: Any) -> List[TextPayload]:
+        pass
+
+    class Config:
+        arbitrary_types_allowed = True
diff --git a/obsei_module/obsei-master/obsei/source/email_source.py b/obsei_module/obsei-master/obsei/source/email_source.py
new file mode 100644
index 0000000000000000000000000000000000000000..907463b6e41357b60dded8989b13a04e82a4dcb9
--- /dev/null
+++ b/obsei_module/obsei-master/obsei/source/email_source.py
@@ -0,0 +1,288 @@
+import email
+import imaplib
+import logging
+from datetime import datetime
+from email.header import decode_header
+from email.message import Message
+from typing import Any, Dict, List, Optional
+
+import pytz
+from pydantic import Field, PrivateAttr, SecretStr
+from pydantic_settings import BaseSettings
+
+from obsei.payload import TextPayload
+from obsei.misc.utils import (
+    DATETIME_STRING_PATTERN,
+    DEFAULT_LOOKUP_PERIOD,
+    convert_utc_time,
+    text_from_html,
+)
+from obsei.source.base_source import BaseSource, BaseSourceConfig
+
+logger = logging.getLogger(__name__)
+
+
+class EmailCredInfo(BaseSettings):
+    username: Optional[SecretStr] = Field(None, env="email_username")
+    password: Optional[SecretStr] = Field(None, env="email_password")
+
+
+class EmailConfig(BaseSourceConfig):
+    # This is done to avoid exposing member to API response
+    _imap_client: imaplib.IMAP4 = PrivateAttr()
+    TYPE: str = "Email"
+    # List of IMAP servers for most commonly used email providers
+    # https://www.systoolsgroup.com/imap/
+    # Also, if you're using a Gmail account then make sure you allow less secure apps on your account -
+    # https://myaccount.google.com/lesssecureapps?pli=1
+    # Also enable IMAP access -
+    # https://mail.google.com/mail/u/0/#settings/fwdandpop
+    imap_server: str
+    imap_port: Optional[int] = None
+    download_attachments: Optional[bool] = False
+    mailboxes: List[str] = Field(["INBOX"])
+    cred_info: Optional[EmailCredInfo] = Field(None)
+    lookup_period: Optional[str] = None
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+
+        self.cred_info = self.cred_info or EmailCredInfo()
+
+        if self.cred_info.password is None or self.cred_info.username is None:
+            raise ValueError("Email account `username` and `password` is required")
+        if self.imap_port:
+            self._imap_client = imaplib.IMAP4_SSL(
+                host=self.imap_server, port=self.imap_port
+            )
+        else:
+            self._imap_client = imaplib.IMAP4_SSL(self.imap_server)
+
+        self._imap_client.login(
+            user=self.cred_info.username.get_secret_value(),
+            password=self.cred_info.password.get_secret_value(),
+        )
+
+    def __del__(self) -> None:
+        # self._imap_client.close()
+        self._imap_client.logout()
+
+    def get_client(self) -> imaplib.IMAP4:
+        return self._imap_client
+
+
+class EmailSource(BaseSource):
+    NAME: str = "Email"
+
+    @staticmethod
+    def clean(text: str) -> str:
+        # clean text for creating a folder
+        return "".join(c if c.isalnum() else "_" for c in text)
+
+    def lookup(self, config: EmailConfig, **kwargs: Any) -> List[TextPayload]:  # type: ignore[override]
+        source_responses: List[TextPayload] = []
+
+        # Get data from state
+        id: str = kwargs.get("id", None)
+        state: Optional[Dict[str, Any]] = (
+            None
+            if id is None or self.store is None
+            else self.store.get_source_state(id)
+        )
+        update_state: bool = True if id else False
+        state = state or dict()
+
+        imap_client = config.get_client()
+
+        for mailbox in config.mailboxes:
+            need_more_lookup = True
+
+            status, messages = imap_client.select(mailbox=mailbox, readonly=True)
+            if status != "OK":
+                logger.warning(f"Not able to connect with {mailbox}: {status}")
+                continue
+
+            mailbox_stat: Dict[str, Any] = state.get(mailbox, dict())
+            lookup_period: str = mailbox_stat.get(
+                "since_time", config.lookup_period or DEFAULT_LOOKUP_PERIOD
+            )
+            if len(lookup_period) <= 5:
+                since_time = convert_utc_time(lookup_period)
+            else:
+                since_time = datetime.strptime(lookup_period, DATETIME_STRING_PATTERN)
+
+            if since_time.tzinfo is None:
+                since_time = since_time.replace(tzinfo=pytz.utc)
+            else:
+                since_time = since_time.astimezone(pytz.utc)
+
+            last_since_time: datetime = since_time
+            since_id: Optional[int] = mailbox_stat.get("since_message_id", None)
+            last_index = since_id
+
+            state[mailbox] = mailbox_stat
+
+            num_of_emails = int(str(messages[0]))
+
+            # Read in reverse order means latest emails first
+            # Most of code is borrowed from https://www.thepythoncode.com/article/reading-emails-in-python and
+            # modified to suite here
+            for index in range(num_of_emails, 0, -1):
+                email_meta: Dict[str, Any] = dict()
+
+                # fetch the email message by ID
+                status, email_message = imap_client.fetch(str(index), "(RFC822)")
+
+                email_content: str = ""
+
+                for response in email_message:
+                    if isinstance(response, tuple):
+                        # parse a bytes email into a message object
+                        msg = email.message_from_bytes(response[1])
+
+                        email_meta["subject"] = self._parse_email_header(msg, "Subject")
+                        email_meta["from_address"] = self._parse_email_header(
+                            msg, "From"
+                        )
+                        email_meta["to_address"] = self._parse_email_header(msg, "To")
+                        date_received_str = self._parse_email_header(msg, "Date")
+
+                        try:
+                            date_received = datetime.strptime(
+                                date_received_str, "%a, %d %b %Y %H:%M:%S %Z"
+                            )
+                        except Exception:
+                            try:
+                                date_received = datetime.strptime(
+                                    date_received_str, "%a, %d %b %Y %H:%M:%S %z"
+                                )
+                            except Exception:
+                                date_received = datetime.strptime(
+                                    date_received_str, "%a, %d %b %Y %H:%M:%S %z (%Z)"
+                                )
+
+                        if date_received.tzinfo is None:
+                            date_received = date_received.replace(tzinfo=pytz.utc)
+                        else:
+                            date_received = date_received.astimezone(pytz.utc)
+                        email_meta["date_received"] = date_received
+                        email_meta["message_id"] = self._parse_email_header(
+                            msg, "Message-ID"
+                        )
+
+                        part_id = 0
+                        # if the email message is multipart
+                        if msg.is_multipart():
+                            # iterate over email parts
+                            for part in msg.walk():
+                                part_id_str = f"part_{part_id}"
+                                # extract content type of email
+                                content_type = part.get_content_type()
+                                content_disposition = str(
+                                    part.get("Content-Disposition")
+                                )
+
+                                email_meta[part_id_str] = dict()
+                                email_meta[part_id_str]["content_type"] = content_type
+                                email_meta[part_id_str][
+                                    "content_disposition"
+                                ] = content_disposition
+
+                                if (
+                                    "attachment" not in content_disposition
+                                    and "text/" in content_type
+                                ):
+                                    try:
+                                        # get the email body
+                                        email_body = part.get_payload(
+                                            decode=True
+                                        ).decode()
+                                        if content_type == "text/html":
+                                            email_body = text_from_html(email_body)
+                                        # append email body with existing
+                                        email_meta[part_id_str][
+                                            "email_body"
+                                        ] = email_body
+                                        email_content = (
+                                            email_content + "\n" + email_body
+                                        )
+                                    except Exception:
+                                        logger.error("Unable to parse email body")
+                                elif "attachment" in content_disposition:
+                                    logger.warning(
+                                        "Email attachment download is not supported"
+                                    )
+                                    # Download attachment is commented currently
+                                    # # download attachment
+                                    # filename = part.get_filename()
+                                    # if filename:
+                                    #    folder_name = self.clean(subject)
+                                    #    if not os.path.isdir(folder_name):
+                                    #        # make a folder for this email (named after the subject)
+                                    #        os.mkdir(folder_name)
+                                    #    filepath = os.path.join(folder_name, filename)
+                                    #    # download attachment and save it
+                                    #    open(filepath, "wb").write(part.get_payload(decode=True))
+
+                                part_id = part_id + 1
+                        else:
+                            part_id_str = f"part_{part_id}"
+                            email_meta[part_id_str] = dict()
+                            # extract content type of email
+                            content_type = msg.get_content_type()
+                            email_meta[part_id_str]["content_type"] = content_type
+
+                            # get the email body
+                            email_body = msg.get_payload(decode=True).decode()
+                            if content_type == "text/html":
+                                email_body = text_from_html(email_body)
+
+                            email_meta[part_id_str]["email_body"] = email_body
+                            email_content = email_content + "\n" + email_body
+
+                        if date_received <= since_time:
+                            need_more_lookup = False
+                            break
+                        if last_index and last_index == email_meta["message_id"]:
+                            need_more_lookup = False
+                            break
+                        if last_since_time is None or last_since_time < date_received:
+                            last_since_time = date_received
+                        if last_index is None:
+                            last_index = email_meta["message_id"]
+
+                        source_responses.append(
+                            TextPayload(
+                                processed_text="\n".join(
+                                    [email_meta.get("subject", ""), email_content]
+                                ),
+                                meta=email_meta,
+                                source_name=self.NAME,
+                            )
+                        )
+
+                if not need_more_lookup:
+                    break
+
+            mailbox_stat["since_time"] = last_since_time.strftime(
+                DATETIME_STRING_PATTERN
+            )
+            mailbox_stat["since_comment_id"] = last_index
+
+        if update_state and self.store is not None:
+            self.store.update_source_state(workflow_id=id, state=state)
+
+        return source_responses
+
+    @staticmethod
+    def _email_cleanup(content: str):  # type: ignore[no-untyped-def]
+        # TODO: Implement the method to cleanup email contents
+        pass
+
+    @staticmethod
+    def _parse_email_header(header: Message, key: str) -> str:
+        value, encoding = decode_header(header[key])[0]
+        if isinstance(value, bytes):
+            # if it's a bytes, decode to str
+            return "" if not encoding else value.decode(encoding)
+        return str(value)
diff --git a/obsei_module/obsei-master/obsei/source/facebook_source.py b/obsei_module/obsei-master/obsei/source/facebook_source.py
new file mode 100644
index 0000000000000000000000000000000000000000..84416f95a321358369bb142fd45e0963180c20e3
--- /dev/null
+++ b/obsei_module/obsei-master/obsei/source/facebook_source.py
@@ -0,0 +1,177 @@
+import logging
+from datetime import datetime
+from typing import Any, Dict, List, Optional
+
+from pydantic import Field, PrivateAttr
+from pydantic.types import SecretStr
+from pydantic_settings import BaseSettings
+from pyfacebook import FacebookApi
+
+from obsei.misc.utils import (
+    DATETIME_STRING_PATTERN,
+    DEFAULT_LOOKUP_PERIOD,
+    convert_utc_time,
+    obj_to_json,
+    convert_datetime_str_to_epoch,
+)
+from obsei.payload import TextPayload
+from obsei.source.base_source import BaseSource, BaseSourceConfig
+
+logger = logging.getLogger(__name__)
+
+
+class FacebookCredentials(BaseSettings):
+    app_id: Optional[SecretStr] = Field(None, env="facebook_app_id")
+    app_secret: Optional[SecretStr] = Field(None, env="facebook_app_secret")
+    long_term_token: Optional[SecretStr] = Field(None, env="facebook_long_term_token")
+
+
+class FacebookSourceConfig(BaseSourceConfig):
+    _api_client: FacebookApi = PrivateAttr()
+    TYPE: str = "Facebook"
+    page_id: str
+    post_ids: Optional[List[str]] = None
+    lookup_period: Optional[str] = None
+    max_post: Optional[int] = 50
+    cred_info: Optional[FacebookCredentials] = Field(None)
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+
+        self.cred_info = self.cred_info or FacebookCredentials()
+
+        if self.cred_info.long_term_token is not None:
+            application_only_auth = False
+        elif self.cred_info.app_id is not None and self.cred_info.app_secret is not None:
+            application_only_auth = True
+        else:
+            raise AttributeError("`app_id`, `app_secret` and `long_term_token` required to connect to Facebook")
+
+        self._api_client = FacebookApi(
+            app_id=self.cred_info.app_id.get_secret_value() if self.cred_info.app_id else None,
+            app_secret=self.cred_info.app_secret.get_secret_value() if self.cred_info.app_secret else None,
+            access_token=self.cred_info.long_term_token.get_secret_value() if self.cred_info.long_term_token else None,
+            application_only_auth=application_only_auth,
+        )
+
+    def get_client(self) -> FacebookApi:
+        return self._api_client
+
+
+class FacebookSource(BaseSource):
+    NAME: str = "Facebook"
+
+    def lookup(self, config: FacebookSourceConfig, **kwargs: Any) -> List[TextPayload]:  # type: ignore[override]
+        source_responses: List[TextPayload] = []
+
+        # Get data from state
+        identifier: str = kwargs.get("id", None)
+        state: Optional[Dict[str, Any]] = (
+            None
+            if identifier is None or self.store is None
+            else self.store.get_source_state(identifier)
+        )
+        update_state: bool = True if identifier else False
+        state = state or dict()
+        since_timestamp: Optional[int] = state.get("since_timestamp", None)
+        if since_timestamp is None:
+            lookup_period = config.lookup_period or DEFAULT_LOOKUP_PERIOD
+            if len(lookup_period) <= 5:
+                since_time = convert_utc_time(lookup_period)
+            else:
+                since_time = datetime.strptime(lookup_period, DATETIME_STRING_PATTERN)
+
+            since_timestamp = int(since_time.timestamp())
+        self.log_object("Since: ", str(datetime.fromtimestamp(since_timestamp)))
+        post_last_since_time = since_timestamp
+
+        api = config.get_client()
+        post_ids = config.post_ids
+        if not post_ids:
+            posts = api.page.get_posts(
+                page_id=config.page_id,
+                count=config.max_post,
+                since_time=str(since_timestamp),
+                return_json=True,
+            )
+            self.log_object("Posts: ", str(posts))
+            post_ids = []
+            for post in posts:
+                post_update_time = convert_datetime_str_to_epoch(post["updated_time"])
+                if post_update_time is not None:
+                    if post_update_time < since_timestamp:
+                        break
+
+                    if (
+                        post_last_since_time is None
+                        or post_last_since_time < post_update_time
+                    ):
+                        post_last_since_time = post_update_time
+                else:
+                    logger.warning("Unable to parse post update time: {}", post["updated_time"])
+
+                post_ids.append(post["id"])
+
+        for post_id in post_ids:
+            # Collect post state
+            post_stat: Dict[str, Any] = state.get(post_id, dict())
+            state[post_id] = post_stat
+
+            comment_since_time = state.get("since_timestamp", since_timestamp)
+            comment_last_since_time = comment_since_time
+
+            comments, comment_summary = api.page.get_comments(
+                object_id=post_id,
+                filter_type="stream",
+                order_type="reverse_chronological",
+            )
+            self.log_object("Comments: ", str(comments))
+            self.log_object("Comment Summary: ", str(comment_summary))
+
+            for comment in comments:
+                comment_created_time = convert_datetime_str_to_epoch(
+                    comment.created_time
+                )
+                if comment_created_time < comment_since_time:
+                    break
+
+                if (
+                    comment_last_since_time is None
+                    or comment_last_since_time < comment_created_time
+                ):
+                    comment_last_since_time = comment_created_time
+
+                source_responses.append(
+                    TextPayload(
+                        processed_text=comment.message,
+                        meta=vars(comment),
+                        source_name=self.NAME,
+                    )
+                )
+
+            post_stat["since_timestamp"] = comment_last_since_time
+
+        state["since_timestamp"] = post_last_since_time
+
+        # TODO: See how to augment with with comments data
+        # if config.include_title_description:
+        #     text_payloads = [
+        #         TextPayload(
+        #             processed_text=f"{data['title']}. {data['description']}",
+        #             meta=data,
+        #             source_name=self.NAME,
+        #         )
+        #         for post in posts
+        #         for data in post["attachments"]["data"]
+        #     ]
+        #
+        #     source_responses.extend(text_payloads)
+
+        if update_state and self.store is not None:
+            self.store.update_source_state(workflow_id=identifier, state=state)
+
+        return source_responses
+
+    @staticmethod
+    def log_object(message: str, result: Any) -> None:
+        logger.debug(message + str(obj_to_json(result)))
diff --git a/obsei_module/obsei-master/obsei/source/google_maps_reviews.py b/obsei_module/obsei-master/obsei/source/google_maps_reviews.py
new file mode 100644
index 0000000000000000000000000000000000000000..9054e74105a27183eb0e447424b96d07fc188b70
--- /dev/null
+++ b/obsei_module/obsei-master/obsei/source/google_maps_reviews.py
@@ -0,0 +1,124 @@
+import logging
+from datetime import datetime
+from typing import Optional, List, Any, Dict
+
+import requests
+from pydantic import SecretStr, Field
+
+from obsei.misc.utils import convert_utc_time, DATETIME_STRING_PATTERN
+from obsei.payload import TextPayload
+from obsei.source.base_source import BaseSourceConfig, BaseSource
+
+logger = logging.getLogger(__name__)
+OUTSCRAPPER_API_URL = 'https://api.app.outscraper.com'
+
+
+class OSGoogleMapsReviewsConfig(BaseSourceConfig):
+    NAME: str = "Maps Reviews Scrapper"
+    queries: List[str]
+    sort: str = "newest"
+    ignore_empty_reviews: bool = True
+    language: str = "en"
+    since_timestamp: Optional[int] = None
+    until_timestamp: Optional[int] = None
+    lookup_period: Optional[str] = None
+    number_of_reviews: int = 10
+    number_of_places_per_query: int = 1
+    country: Optional[str] = None
+    filtered_fields: List[str] = Field(['reviews_data'])
+    # parameter defines the coordinates of the location where you want your query to be applied.
+    # It has to be constructed in the next sequence: "@" + "latitude" + "," + "longitude" + "," + "zoom"
+    # (e.g. "@41.3954381,2.1628662,15.1z").
+    central_coordinates: Optional[str] = None
+    # Get API key from https://outscraper.com/
+    api_key: Optional[SecretStr] = Field(None, env="outscrapper_api_key")
+
+    def __init__(self, **values: Any):
+        super().__init__(**values)
+
+        if self.api_key is None:
+            raise ValueError("OutScrapper API key require to fetch reviews data")
+
+
+class OSGoogleMapsReviewsSource(BaseSource):
+    NAME: str = "Maps Reviews Scrapper"
+
+    def lookup(self, config: OSGoogleMapsReviewsConfig, **kwargs: Any) -> List[TextPayload]:  # type: ignore[override]
+        source_responses: List[TextPayload] = []
+
+        # Get data from state
+        identifier: str = kwargs.get("id", None)
+
+        state: Optional[Dict[str, Any]] = (
+            None
+            if id is None or self.store is None
+            else self.store.get_source_state(identifier)
+        )
+
+        update_state: bool = True if identifier else False
+        state = state or dict()
+
+        since_timestamp: Optional[int] = (
+             None if state is None else state.get("since_timestamp", None)
+        )
+        since_timestamp = since_timestamp or config.since_timestamp
+        if since_timestamp is None and config.lookup_period is not None:
+            if len(config.lookup_period) <= 5:
+                since_time = convert_utc_time(config.lookup_period)
+            else:
+                since_time = datetime.strptime(config.lookup_period, DATETIME_STRING_PATTERN)
+
+            since_timestamp = int(since_time.timestamp())
+
+        last_reviews_since_time = since_timestamp
+
+        params: Dict[str, Any] = {
+            'query': config.queries,
+            'reviewsLimit': config.number_of_reviews,
+            'limit': config.number_of_places_per_query,
+            'sort': config.sort,
+            # Reviews are sorted from latest to oldest in case cutoff or start is passed
+            # cutoff is oldest timestamp till reviews are needed
+            'cutoff': since_timestamp,
+            # start is newest timestamp from reviews are needed
+            'start': config.until_timestamp,
+            'ignoreEmpty': config.ignore_empty_reviews,
+            'coordinates': config.central_coordinates,
+            'language': config.language,
+            'region': config.country,
+            'fields': ",".join(config.filtered_fields),
+            'async': False,
+        }
+
+        # For API doc refer https://app.outscraper.com/api-docs#tag/Google-Reviews
+        response = requests.get(f'{OUTSCRAPPER_API_URL}/maps/reviews-v3', params=params, headers={
+            'X-API-KEY': "" if config.api_key is None else config.api_key.get_secret_value(),
+        })
+
+        queries_data = []
+        if response.status_code == 200:
+            queries_data = response.json().get('data', [])
+        else:
+            logger.warning(f"API call failed with error: {response.json()}")
+
+        for query_data in queries_data:
+            reviews = [] if "reviews_data" not in query_data else query_data.pop("reviews_data")
+
+            for review in reviews:
+                source_responses.append(
+                    TextPayload(
+                        processed_text=review["review_text"],
+                        meta={**review, **query_data},
+                        source_name=self.NAME,
+                    )
+                )
+                review_time = review["review_timestamp"]
+
+                if last_reviews_since_time is None or last_reviews_since_time < review_time:
+                    last_reviews_since_time = review_time
+
+        state["since_timestamp"] = last_reviews_since_time
+        if update_state and self.store is not None:
+            self.store.update_source_state(workflow_id=identifier, state=state)
+
+        return source_responses
diff --git a/obsei_module/obsei-master/obsei/source/google_news_source.py b/obsei_module/obsei-master/obsei/source/google_news_source.py
new file mode 100644
index 0000000000000000000000000000000000000000..f1fcf985fdc4b3704f69269274fcd9b319390394
--- /dev/null
+++ b/obsei_module/obsei-master/obsei/source/google_news_source.py
@@ -0,0 +1,160 @@
+from typing import Any, Dict, List, Optional
+from urllib import parse
+
+import dateparser
+from GoogleNews import GoogleNews
+from pydantic import PrivateAttr
+from datetime import datetime, date, timedelta, time, timezone
+
+from obsei.payload import TextPayload
+from obsei.misc.utils import DATETIME_STRING_PATTERN, convert_utc_time, DEFAULT_LOOKUP_PERIOD
+from obsei.source.base_source import BaseSource, BaseSourceConfig
+from obsei.source.website_crawler_source import (
+    BaseCrawlerConfig,
+    TrafilaturaCrawlerConfig,
+)
+
+GOOGLE_DATE_TIME_QUERY_PATTERN = "%Y-%m-%d"
+
+
+class GoogleNewsConfig(BaseSourceConfig):
+    _google_news_client: GoogleNews = PrivateAttr()
+    TYPE: str = "GoogleNews"
+    query: str
+    country: Optional[str] = "US"
+    language: Optional[str] = "en"
+    max_results: Optional[int] = 100
+    lookup_period: Optional[str] = None
+    after_date: Optional[str] = None  # latest time
+    before_date: Optional[str] = None  # oldest time
+    fetch_article: Optional[bool] = False
+    crawler_config: Optional[BaseCrawlerConfig] = None
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+
+        if self.lookup_period and self.after_date:
+            raise AttributeError("Can't use `lookup_period` and `after_date` both")
+        elif not self.after_date and self.before_date:
+            raise AttributeError("Can't use `before_date` without `after_date` or `lookup_period`")
+
+        if self.lookup_period:
+            after_time = convert_utc_time(self.lookup_period)
+            self.after_date = after_time.strftime(GOOGLE_DATE_TIME_QUERY_PATTERN)
+
+        if not self.before_date:
+            before_time = datetime.combine(date.today(), time(tzinfo=timezone.utc)) + timedelta(days=1)
+            self.before_date = before_time.strftime(GOOGLE_DATE_TIME_QUERY_PATTERN)
+
+        self._google_news_client = GoogleNews(
+            lang=self.language,
+            region=self.country
+        )
+
+        if not self.crawler_config:
+            self.crawler_config = TrafilaturaCrawlerConfig(urls=[])
+
+    def get_client(self) -> GoogleNews:
+        return self._google_news_client
+
+
+class GoogleNewsSource(BaseSource):
+    NAME: Optional[str] = "GoogleNews"
+
+    def lookup(self, config: GoogleNewsConfig, **kwargs: Any) -> List[TextPayload]:  # type: ignore[override]
+        source_responses: List[TextPayload] = []
+
+        # Get data from state
+        id: str = kwargs.get("id", None)
+        state: Optional[Dict[str, Any]] = (
+            None
+            if id is None or self.store is None
+            else self.store.get_source_state(id)
+        )
+        update_state: bool = True if id else False
+        state = state or dict()
+        lookup_period: str = state.get("since_time", None) or DEFAULT_LOOKUP_PERIOD
+        since_time: datetime = convert_utc_time(lookup_period)
+        last_since_time = since_time
+
+        today_start_of_day: datetime = datetime.combine(date.today(), time(tzinfo=timezone.utc))
+        today_end_of_day: datetime = today_start_of_day + timedelta(days=1)
+
+        last_after_time: datetime  # start_time
+        if config.after_date:
+            last_after_time = convert_utc_time(config.after_date)
+        else:
+            last_after_time = today_start_of_day
+
+        if state.get("since_time", None) is not None:
+            last_after_time = since_time \
+                if since_time > last_after_time \
+                else last_since_time
+
+        before_time: datetime  # end time
+        if config.before_date and config.after_date:
+            before_time = convert_utc_time(config.before_date)
+        else:
+            before_time = today_end_of_day
+
+        if before_time > today_start_of_day:
+            before_time = today_end_of_day
+
+        google_news_client = config.get_client()
+        more_data_exist = True
+        while more_data_exist and before_time > last_after_time:
+            after_time = before_time - timedelta(days=1)
+            after_date = after_time.strftime(GOOGLE_DATE_TIME_QUERY_PATTERN)
+            before_date = before_time.strftime(GOOGLE_DATE_TIME_QUERY_PATTERN)
+
+            new_query = f'{config.query}+after:{after_date}+before:{before_date}'
+            # query = parse.quote(new_query, errors='ignore')
+
+            before_time = after_time
+
+            google_news_client.get_news(new_query)
+            articles = google_news_client.results(sort=True)
+
+            for article in articles:
+                published_date = (
+                    None
+                    if article["datetime"] is None
+                    else article["datetime"].replace(tzinfo=timezone.utc)
+                )
+
+                article_text: str = ""
+                if config.fetch_article and config.crawler_config:
+                    extracted_data = config.crawler_config.extract_url(url=article["link"])
+
+                    if extracted_data.get("text", None) is not None:
+                        article_text = extracted_data["text"]
+                        del extracted_data["text"]
+
+                    article["extracted_data"] = extracted_data
+
+                source_responses.append(
+                    TextPayload(
+                        processed_text=f"{article['title']}.\n\n {article_text}",
+                        meta=vars(article) if hasattr(article, "__dict__") else article,
+                        source_name=self.NAME,
+                    )
+                )
+
+                if config.max_results is not None and len(source_responses) >= config.max_results:
+                    source_responses = source_responses[:config.max_results]
+                    more_data_exist = False
+                    break
+
+                if published_date and since_time and published_date < since_time:
+                    more_data_exist = False
+                    break
+                if last_since_time is None or (
+                        published_date and last_since_time < published_date
+                ):
+                    last_since_time = published_date
+
+            if update_state and last_since_time and self.store is not None:
+                state["since_time"] = last_since_time.strftime(DATETIME_STRING_PATTERN)
+                self.store.update_source_state(workflow_id=id, state=state)
+
+        return source_responses
diff --git a/obsei_module/obsei-master/obsei/source/pandas_source.py b/obsei_module/obsei-master/obsei/source/pandas_source.py
new file mode 100644
index 0000000000000000000000000000000000000000..537ecc25ba4b680d0e4146b85ff2ba937d33e833
--- /dev/null
+++ b/obsei_module/obsei-master/obsei/source/pandas_source.py
@@ -0,0 +1,54 @@
+from typing import List, Optional, Any
+
+from pandas import DataFrame
+
+from obsei.source.base_source import BaseSource, BaseSourceConfig
+from obsei.payload import TextPayload
+
+
+class PandasSourceConfig(BaseSourceConfig):
+    TYPE: str = "Pandas"
+
+    dataframe: DataFrame
+    text_columns: List[str]
+    separator: str = " "
+    include_columns: Optional[List[str]] = None
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+
+        if len(self.text_columns) == 0:
+            raise ValueError("`text_columns` cannot be empty")
+
+        if not all(
+            [text_column in self.dataframe.columns for text_column in self.text_columns]
+        ):
+            raise ValueError("Every `text_columns` should be present in `dataframe`")
+
+        try:
+            self.dataframe[self.text_columns] = self.dataframe[
+                self.text_columns
+            ].astype("string")
+        except TypeError as e:
+            raise ValueError("Unable to convert `text_columns` to string dtype")
+
+
+class PandasSource(BaseSource):
+    NAME: Optional[str] = "Pandas"
+
+    def lookup(self, config: PandasSourceConfig, **kwargs: Any) -> List[TextPayload]:  # type: ignore[override]
+        df_to_records = config.dataframe.to_dict("records")
+        source_responses: List[TextPayload] = [
+            TextPayload(
+                processed_text=config.separator.join(
+                    [record.get(text_column) for text_column in config.text_columns]
+                ),
+                meta={key: record[key] for key in config.include_columns}
+                if config.include_columns is not None
+                else record,
+                source_name=self.NAME,
+            )
+            for record in df_to_records
+        ]
+
+        return source_responses
diff --git a/obsei_module/obsei-master/obsei/source/playstore_reviews.py b/obsei_module/obsei-master/obsei/source/playstore_reviews.py
new file mode 100644
index 0000000000000000000000000000000000000000..99ab15497f2837f62aa6e03f96903b393ff13aa6
--- /dev/null
+++ b/obsei_module/obsei-master/obsei/source/playstore_reviews.py
@@ -0,0 +1,128 @@
+from typing import Any, Dict, List, Optional
+
+from google.auth.credentials import Credentials
+from google.oauth2 import service_account
+from googleapiclient.discovery import build
+from pydantic import Field, SecretStr, PrivateAttr
+from pydantic_settings import BaseSettings
+
+from obsei.payload import TextPayload
+from obsei.source.base_source import BaseSource, BaseSourceConfig
+
+
+class GoogleCredInfo(BaseSettings):
+    # Currently only service_account_file type credential supported
+    # Refer: https://developers.google.com/identity/protocols/oauth2/service-account
+    service_cred_file: Optional[str] = Field(None, env="google_service_cred_file")
+    developer_key: Optional[SecretStr] = Field(None, env="google_developer_key")
+    scopes: List[str] = ["https://www.googleapis.com/auth/androidpublisher"]
+
+
+class PlayStoreConfig(BaseSourceConfig):
+    _credentials: Credentials = PrivateAttr()
+    TYPE: str = "PlayStore"
+    package_name: str
+    start_index: Optional[int] = None
+    max_results: int = 10
+    num_retries: int = 1
+    with_quota_project_id: Optional[str] = None
+    with_subject: Optional[str] = None
+    cred_info: Optional[GoogleCredInfo] = Field(None)
+
+    def __init__(self, **values: Any):
+        super().__init__(**values)
+
+        self.cred_info = self.cred_info or GoogleCredInfo()
+
+        if self.cred_info.service_cred_file is None or self.cred_info.developer_key is None:
+            raise ValueError("`service_cred_file` and `developer_key` can't be empty")
+
+        self._credentials = service_account.Credentials.from_service_account_file(
+            filename=self.cred_info.service_cred_file, scopes=self.cred_info.scopes
+        )
+
+        if self.with_quota_project_id is not None:
+            self._credentials = self._credentials.with_quota_project(self.with_quota_project_id)
+
+        if self.with_subject is not None:
+            self._credentials = self._credentials.with_subject(self.with_subject)
+
+    def get_google_credentials(self) -> Credentials:
+        return self._credentials
+
+    def get_developer_key(self) -> str:
+        if self.cred_info is None or self.cred_info.developer_key is None:
+            raise ValueError("`developer_key` can't be empty")
+        return self.cred_info.developer_key.get_secret_value()
+
+
+class PlayStoreSource(BaseSource):
+    NAME: str = "PlayStore"
+
+    def lookup(self, config: PlayStoreConfig, **kwargs: Any) -> List[TextPayload]:  # type: ignore[override]
+        source_responses: List[TextPayload] = []
+        # Refer https://github.com/googleapis/google-api-python-client/blob/master/docs/start.md
+        with build(
+                serviceName="androidpublisher",
+                version="v3",
+                credentials=config.get_google_credentials(),
+                developerKey=config.get_developer_key(),
+        ) as service:
+            reviews = service.reviews()
+            pagination_token: Optional[str] = None
+
+            # Get data from state
+            id: str = kwargs.get("id", None)
+            state: Optional[Dict[str, Any]] = (
+                None
+                if id is None or self.store is None
+                else self.store.get_source_state(id)
+            )
+            start_index: Optional[int] = (
+                config.start_index or None
+                if state is None
+                else state.get("start_index", None)
+            )
+            update_state: bool = True if id else False
+            state = state or dict()
+            review_id = start_index
+
+            while True:
+                # Refer following link -
+                # https://googleapis.github.io/google-api-python-client/docs/dyn/androidpublisher_v3.reviews.html#list
+                responses = reviews.list(
+                    package_name=config.package_name,
+                    max_results=config.max_results,
+                    start_index=start_index,
+                    token=pagination_token,
+                )
+
+                if "reviews" in responses:
+                    reviews = responses["responses"]
+                    for review in reviews:
+                        if "comments" not in review:
+                            continue
+
+                        review_id = review["reviewId"]
+
+                        # Currently only one user comment is supported
+                        text = review["comments"][0]["userComment"]["text"]
+                        source_responses.append(
+                            TextPayload(
+                                processed_text=text, meta=review, source_name=self.NAME
+                            )
+                        )
+
+                pagination_token = None
+                if "tokenPagination" in responses:
+                    if "nextPageToken" in responses["tokenPagination"]:
+                        pagination_token = responses["tokenPagination"]["nextPageToken"]
+
+                if pagination_token is None:
+                    break
+
+        if update_state and self.store is not None:
+            state["start_index"] = review_id
+            self.store.update_source_state(workflow_id=id, state=state)
+
+        return source_responses
diff --git a/obsei_module/obsei-master/obsei/source/playstore_scrapper.py b/obsei_module/obsei-master/obsei/source/playstore_scrapper.py
new file mode 100644
index 0000000000000000000000000000000000000000..a7e3472e18c38ba8380c766bc46774effd8f97bc
--- /dev/null
+++ b/obsei_module/obsei-master/obsei/source/playstore_scrapper.py
@@ -0,0 +1,168 @@
+import logging
+import re
+from datetime import datetime, timezone
+from typing import Any, Dict, List, Optional, Tuple
+from urllib import parse
+
+from google_play_scraper import Sort, reviews
+
+from obsei.misc.web_search import perform_search
+from obsei.source.base_source import BaseSource, BaseSourceConfig
+from obsei.payload import TextPayload
+from obsei.misc.utils import (
+    DATETIME_STRING_PATTERN,
+    DEFAULT_LOOKUP_PERIOD,
+    convert_utc_time,
+)
+
+logger = logging.getLogger(__name__)
+
+
+class PlayStoreScrapperConfig(BaseSourceConfig):
+    TYPE: str = "PlayStoreScrapper"
+    app_url: Optional[str] = None
+    countries: Optional[List[str]] = None
+    package_name: Optional[str] = None
+    app_name: Optional[str] = None
+    language: Optional[str] = None
+    filter_score_with: Optional[int] = None
+    lookup_period: Optional[str] = None
+    max_count: Optional[int] = 200
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+
+        if self.app_url is not None:
+            self.package_name, self.countries, self.language = PlayStoreScrapperConfig.parse_app_url(self.app_url)
+        else:
+            if not self.package_name and self.app_name:
+                self.package_name = PlayStoreScrapperConfig.search_package_name(
+                    self.app_name
+                )
+
+        if not self.package_name:
+            raise ValueError("Valid `package_name`, `app_name` or `app_url` is mandatory")
+
+        self.language = self.language or "en"
+        self.countries = self.countries or ["us"]
+        self.app_name = self.app_name or self.package_name
+
+    @classmethod
+    def parse_app_url(cls, app_url: str) -> Tuple[Optional[str], Optional[List[str]], Optional[str]]:
+
+        parsed_url = parse.urlparse(app_url)
+        query_dict = parse.parse_qs(parsed_url.query)
+        countries = query_dict.get('gl', None)
+
+        language = None
+        languages = query_dict.get('hl', None)
+        if languages is not None:
+            language = languages[0]
+
+        package_name = None
+        package_ids = query_dict.get('id', None)
+        if package_ids is not None:
+            package_name = package_ids[0]
+
+        return package_name, countries, language
+
+    @classmethod
+    def search_package_name(cls, app_name: str) -> str:
+        base_request_url = f"https://play.google.com"
+        search_response = perform_search(
+            request_url=base_request_url, query=f"play store {app_name}"
+        )
+
+        pattern = r"play.google.com/store/apps/details.+?id=([0-9a-z.]+)"
+        match_object = re.search(pattern, search_response.text)
+        if match_object:
+            app_id = str(match_object.group(1))
+        else:
+            raise RuntimeError("Pattern matching is not found")
+        return app_id
+
+
+class PlayStoreScrapperSource(BaseSource):
+    NAME: Optional[str] = "PlayStoreScrapper"
+
+    def lookup(  # type: ignore[override]
+        self, config: PlayStoreScrapperConfig, **kwargs: Any
+    ) -> List[TextPayload]:
+        source_responses: List[TextPayload] = []
+
+        # Get data from state
+        id: str = kwargs.get("id", None)
+        state: Optional[Dict[str, Any]] = (
+            None
+            if id is None or self.store is None
+            else self.store.get_source_state(id)
+        )
+        update_state: bool = True if id else False
+        state = state or dict()
+
+        if config.countries is None or len(config.countries) == 0:
+            logger.warning("`countries` in config should not be empty or None")
+            return source_responses
+
+        for country in config.countries:
+            country_stat: Dict[str, Any] = state.get(country, dict())
+            lookup_period: str = country_stat.get("since_time", config.lookup_period)
+            lookup_period = lookup_period or DEFAULT_LOOKUP_PERIOD
+            if len(lookup_period) <= 5:
+                since_time = convert_utc_time(lookup_period)
+            else:
+                since_time = datetime.strptime(lookup_period, DATETIME_STRING_PATTERN)
+
+            last_since_time: datetime = since_time
+
+            # since_id: Optional[str] = country_stat.get("since_id", None)
+            # last_index = since_id
+            # state[scrapper.country] = country_stat
+
+            continuation_token = None
+            while True:
+                store_reviews, continuation_token = reviews(
+                    app_id=config.package_name,
+                    lang=config.language,
+                    country=country,
+                    sort=Sort.NEWEST,
+                    filter_score_with=config.filter_score_with,
+                    continuation_token=continuation_token,
+                    count=config.max_count,
+                )
+                store_reviews = store_reviews or []
+
+                for review in store_reviews:
+                    source_responses.append(
+                        TextPayload(
+                            processed_text=review["content"],
+                            meta=review,
+                            source_name=self.NAME,
+                        )
+                    )
+                    review_time = review["at"].replace(tzinfo=timezone.utc)
+
+                    if since_time > review_time:
+                        break
+
+                    if last_since_time is None or last_since_time < review_time:
+                        last_since_time = review_time
+                    # if last_index is None or last_index < review.id:
+                    #    last_index = review.id
+
+                if (
+                    continuation_token is None
+                    or continuation_token.token is None
+                    or continuation_token.count <= len(source_responses)
+                ):
+                    break
+
+            country_stat["since_time"] = last_since_time.strftime(
+                DATETIME_STRING_PATTERN
+            )
+            # country_stat["since_id"] = last_index
+
+        if update_state and self.store is not None:
+            self.store.update_source_state(workflow_id=id, state=state)
+
+        return source_responses
diff --git a/obsei_module/obsei-master/obsei/source/reddit_scrapper.py b/obsei_module/obsei-master/obsei/source/reddit_scrapper.py
new file mode 100644
index 0000000000000000000000000000000000000000..ca79601f1ba86a00fcd3904bcb506eb2fb331e38
--- /dev/null
+++ b/obsei_module/obsei-master/obsei/source/reddit_scrapper.py
@@ -0,0 +1,108 @@
+import logging
+from datetime import datetime, timezone
+from typing import Any, Dict, List, Optional
+
+import mmh3
+from pydantic import PrivateAttr
+from reddit_rss_reader.reader import RedditContent, RedditRSSReader
+
+from obsei.source.base_source import BaseSource, BaseSourceConfig
+from obsei.payload import TextPayload
+from obsei.misc.utils import (
+    DATETIME_STRING_PATTERN,
+    DEFAULT_LOOKUP_PERIOD,
+    convert_utc_time,
+)
+
+logger = logging.getLogger(__name__)
+
+
+class RedditScrapperConfig(BaseSourceConfig):
+    _scrapper: RedditRSSReader = PrivateAttr()
+    TYPE: str = "RedditScrapper"
+    url: str
+    url_id: Optional[str] = None
+    user_agent: Optional[str] = None
+    lookup_period: Optional[str] = None
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+
+        # Using 32 bit hash
+        self.url_id = self.url_id or "{:02x}".format(mmh3.hash(self.url, signed=False))
+
+        self._scrapper = RedditRSSReader(
+            url=self.url,
+            user_agent=self.user_agent
+            if self.user_agent
+            else "script {url_hash}".format(url_hash=self.url_id),
+        )
+
+    def get_readers(self) -> RedditRSSReader:
+        return self._scrapper
+
+
+class RedditScrapperSource(BaseSource):
+    NAME: Optional[str] = "RedditScrapper"
+
+    def lookup(self, config: RedditScrapperConfig, **kwargs: Any) -> List[TextPayload]:  # type: ignore[override]
+        source_responses: List[TextPayload] = []
+
+        # Get data from state
+        identifier: str = kwargs.get("id", None)
+        state: Optional[Dict[str, Any]] = (
+            None
+            if identifier is None or self.store is None
+            else self.store.get_source_state(identifier)
+        )
+        update_state: bool = True if identifier else False
+        state = state or dict()
+
+        scrapper_stat: Dict[str, Any] = (
+            dict() if not config.url_id else state.get(config.url_id, dict())
+        )
+        lookup_period: str = scrapper_stat.get("since_time", config.lookup_period)
+        lookup_period = lookup_period or DEFAULT_LOOKUP_PERIOD
+        since_time = convert_utc_time(lookup_period)
+
+        last_since_time: datetime = since_time
+
+        since_id: Optional[str] = scrapper_stat.get("since_id", None)
+        last_index = since_id
+        if config.url_id:
+            state[config.url_id] = scrapper_stat
+
+        reddit_data: Optional[List[RedditContent]] = None
+        try:
+            reddit_data = config.get_readers().fetch_content(
+                after=since_time, since_id=since_id
+            )
+        except RuntimeError as ex:
+            logger.warning(ex.__cause__)
+
+        reddit_data = reddit_data or []
+
+        for reddit in reddit_data:
+            source_responses.append(
+                TextPayload(
+                    processed_text=f"{reddit.title}. {reddit.extracted_text}",
+                    meta=reddit.__dict__,
+                    source_name=self.NAME,
+                )
+            )
+
+            comment_time = reddit.updated.replace(tzinfo=timezone.utc)
+
+            if last_since_time is None or last_since_time < comment_time:
+                last_since_time = comment_time
+            if last_index is None:
+                # Assuming list is sorted based on time
+                last_index = reddit.id
+
+        scrapper_stat["since_time"] = last_since_time.strftime(DATETIME_STRING_PATTERN)
+        scrapper_stat["since_id"] = last_index
+
+        if update_state and self.store is not None:
+            self.store.update_source_state(workflow_id=identifier, state=state)
+
+        return source_responses
diff --git a/obsei_module/obsei-master/obsei/source/reddit_source.py b/obsei_module/obsei-master/obsei/source/reddit_source.py
new file mode 100644
index 0000000000000000000000000000000000000000..67a0603c4ab9bdfd20e95472f296d5a7d0b51ea7
--- /dev/null
+++ b/obsei_module/obsei-master/obsei/source/reddit_source.py
@@ -0,0 +1,150 @@
+from datetime import datetime
+from typing import Any, Dict, List, Optional
+
+from praw import Reddit
+from pydantic import Field, PrivateAttr, SecretStr
+from pydantic_settings import BaseSettings
+
+from obsei.payload import TextPayload
+from obsei.misc.utils import (
+    DATETIME_STRING_PATTERN,
+    DEFAULT_LOOKUP_PERIOD,
+    convert_utc_time,
+    text_from_html,
+)
+from obsei.source.base_source import BaseSource, BaseSourceConfig
+
+
+class RedditCredInfo(BaseSettings):
+    # Create credential at https://www.reddit.com/prefs/apps
+    # Also refer https://praw.readthedocs.io/en/latest/getting_started/authentication.html
+    # Currently Password Flow, Read Only Mode and Saved Refresh Token Mode are supported
+    client_id: SecretStr = Field(None, env="reddit_client_id")
+    client_secret: SecretStr = Field(None, env="reddit_client_secret")
+    user_agent: str = "Test User Agent"
+    redirect_uri: Optional[str] = None
+    refresh_token: Optional[SecretStr] = Field(None, env="reddit_refresh_token")
+    username: Optional[str] = Field(None, env="reddit_username")
+    password: Optional[SecretStr] = Field(None, env="reddit_password")
+    read_only: bool = True
+
+
+class RedditConfig(BaseSourceConfig):
+    # This is done to avoid exposing member to API response
+    _reddit_client: Reddit = PrivateAttr()
+    TYPE: str = "Reddit"
+    subreddits: List[str]
+    post_ids: Optional[List[str]] = None
+    lookup_period: Optional[str] = None
+    include_post_meta: Optional[bool] = True
+    post_meta_field: str = "post_meta"
+    cred_info: Optional[RedditCredInfo] = Field(None)
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+
+        self.cred_info = self.cred_info or RedditCredInfo()
+
+        self._reddit_client = Reddit(
+            client_id=self.cred_info.client_id.get_secret_value(),
+            client_secret=self.cred_info.client_secret.get_secret_value(),
+            redirect_uri=self.cred_info.redirect_uri,
+            user_agent=self.cred_info.user_agent,
+            refresh_token=self.cred_info.refresh_token.get_secret_value()
+            if self.cred_info.refresh_token
+            else None,
+            username=self.cred_info.username if self.cred_info.username else None,
+            password=self.cred_info.password.get_secret_value()
+            if self.cred_info.password
+            else None,
+        )
+
+        self._reddit_client.read_only = self.cred_info.read_only
+
+    def get_reddit_client(self) -> Reddit:
+        return self._reddit_client
+
+
+class RedditSource(BaseSource):
+    NAME: str = "Reddit"
+
+    def lookup(self, config: RedditConfig, **kwargs: Any) -> List[TextPayload]:  # type: ignore[override]
+        source_responses: List[TextPayload] = []
+
+        # Get data from state
+        id: str = kwargs.get("id", None)
+        state: Optional[Dict[str, Any]] = (
+            None
+            if id is None or self.store is None
+            else self.store.get_source_state(id)
+        )
+        update_state: bool = True if id else False
+        state = state or dict()
+
+        subreddit_reference = config.get_reddit_client().subreddit(
+            "+".join(config.subreddits)
+        )
+        post_stream = subreddit_reference.stream.submissions(pause_after=-1)
+        for post in post_stream:
+            if post is None:
+                break
+
+            post_data = vars(post)
+            post_id = post_data["id"]
+            if config.post_ids and not config.post_ids.__contains__(post_id):
+                continue
+
+            post_stat: Dict[str, Any] = state.get(post_id, dict())
+            lookup_period: str = post_stat.get("since_time", config.lookup_period)
+            lookup_period = lookup_period or DEFAULT_LOOKUP_PERIOD
+            if len(lookup_period) <= 5:
+                since_time = convert_utc_time(lookup_period)
+            else:
+                since_time = datetime.strptime(lookup_period, DATETIME_STRING_PATTERN)
+
+            last_since_time: datetime = since_time
+
+            since_id: Optional[str] = post_stat.get("since_comment_id", None)
+            last_index = since_id
+            state[post_id] = post_stat
+
+            post.comment_sort = "new"
+            post.comments.replace_more(limit=None)
+
+            # top_level_comments only
+            first_comment = True
+            for comment in post.comments:
+                comment_data = vars(comment)
+                if config.include_post_meta:
+                    comment_data[config.post_meta_field] = post_data
+
+                comment_time = datetime.utcfromtimestamp(
+                    int(comment_data["created_utc"])
+                )
+                comment_id = comment_data["id"]
+
+                if comment_time < since_time:
+                    break
+                if last_index and last_index == comment_id:
+                    break
+                if last_since_time is None or last_since_time < comment_time:
+                    last_since_time = comment_time
+                if last_index is None or first_comment:
+                    last_index = comment_id
+                    first_comment = False
+
+                text = "".join(text_from_html(comment_data["body_html"]))
+
+                source_responses.append(
+                    TextPayload(
+                        processed_text=text, meta=comment_data, source_name=self.NAME
+                    )
+                )
+
+            post_stat["since_time"] = last_since_time.strftime(DATETIME_STRING_PATTERN)
+            post_stat["since_comment_id"] = last_index
+
+        if update_state and self.store is not None:
+            self.store.update_source_state(workflow_id=id, state=state)
+
+        return source_responses
diff --git a/obsei_module/obsei-master/obsei/source/test1.py b/obsei_module/obsei-master/obsei/source/test1.py
new file mode 100644
index 0000000000000000000000000000000000000000..7f76dd482ba6f39dc7304fdf299cf3bbfc9c37eb
--- /dev/null
+++ b/obsei_module/obsei-master/obsei/source/test1.py
@@ -0,0 +1,11 @@
+from website_crawler_source import TrafilaturaCrawlerConfig
+import json
+if __name__ == "__main__":
+    # Create an instance of TrafilaturaCrawlerConfig
+    config = TrafilaturaCrawlerConfig(
+        urls=["https://nld.com.vn/10-tro-ly-cap-cao-cua-tong-thong-han-quoc-dong-loat-tu-chuc-196241204104419743.htm"]
+    )
+    result = config.extract_url("https://nld.com.vn/10-tro-ly-cap-cao-cua-tong-thong-han-quoc-dong-loat-tu-chuc-196241204104419743.htm")
+
+    # Print the result
+    print(json.dumps(result['text'], indent=4, ensure_ascii=False))
\ No newline at end of file
diff --git a/obsei_module/obsei-master/obsei/source/twitter_source.py b/obsei_module/obsei-master/obsei/source/twitter_source.py
new file mode 100644
index 0000000000000000000000000000000000000000..819a4f502c973f234759cd67acfe759e3e8f620e
--- /dev/null
+++ b/obsei_module/obsei-master/obsei/source/twitter_source.py
@@ -0,0 +1,327 @@
+import logging
+from datetime import datetime
+
+import pytz
+import requests
+
+from typing import Any, Dict, List, Optional
+
+from pydantic import Field
+from pydantic.types import SecretStr
+from pydantic_settings import BaseSettings
+from searchtweets import collect_results, gen_request_parameters
+
+from obsei.source.base_source import BaseSource, BaseSourceConfig
+from obsei.payload import TextPayload
+
+from obsei.misc.utils import convert_utc_time
+
+logger = logging.getLogger(__name__)
+
+TWITTER_OAUTH_ENDPOINT = "https://api.twitter.com/oauth2/token"
+
+DEFAULT_MAX_TWEETS = 10
+
+DEFAULT_TWEET_FIELDS = [
+    "author_id",
+    "conversation_id",
+    "created_at",
+    "entities",
+    "geo",
+    "id",
+    "in_reply_to_user_id",
+    "lang",
+    "public_metrics",
+    "referenced_tweets",
+    "source",
+    "text",
+    "withheld",
+]
+DEFAULT_EXPANSIONS = [
+    "author_id",
+    "entities.mentions.username",
+    "geo.place_id",
+    "in_reply_to_user_id",
+    "referenced_tweets.id",
+    "referenced_tweets.id.author_id",
+]
+DEFAULT_PLACE_FIELDS = [
+    "contained_within",
+    "country",
+    "country_code",
+    "full_name",
+    "geo",
+    "id",
+    "name",
+    "place_type",
+]
+DEFAULT_USER_FIELDS = [
+    "created_at",
+    "description",
+    "entities",
+    "id",
+    "location",
+    "name",
+    "public_metrics",
+    "url",
+    "username",
+    "verified",
+]
+DEFAULT_OPERATORS = ["-is:reply", "-is:retweet"]
+
+
+class TwitterCredentials(BaseSettings):
+    bearer_token: SecretStr = Field("", env="twitter_bearer_token")
+    consumer_key: SecretStr = Field("", env="twitter_consumer_key")
+    consumer_secret: SecretStr = Field("", env="twitter_consumer_secret")
+    endpoint: str = Field(
+        "https://api.twitter.com/2/tweets/search/recent", env="twitter_endpoint"
+    )
+    extra_headers_dict: Optional[Dict[str, Any]] = None
+
+
+class TwitterSourceConfig(BaseSourceConfig):
+    TYPE: str = "Twitter"
+    query: Optional[str] = None
+    keywords: Optional[List[str]] = None
+    hashtags: Optional[List[str]] = None
+    usernames: Optional[List[str]] = None
+    operators: Optional[List[str]] = Field(DEFAULT_OPERATORS)
+    since_id: Optional[int] = None
+    until_id: Optional[int] = None
+    lookup_period: Optional[str] = None
+    tweet_fields: Optional[List[str]] = Field(DEFAULT_TWEET_FIELDS)
+    user_fields: Optional[List[str]] = Field(DEFAULT_USER_FIELDS)
+    expansions: Optional[List[str]] = Field(DEFAULT_EXPANSIONS)
+    place_fields: Optional[List[str]] = Field(DEFAULT_PLACE_FIELDS)
+    max_tweets: int = DEFAULT_MAX_TWEETS
+    cred_info: TwitterCredentials = Field(None)
+    credential: Optional[TwitterCredentials] = None
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+
+        self.cred_info = self.cred_info or TwitterCredentials()
+
+        if self.credential is not None:
+            logger.warning("`credential` is deprecated; use `cred_info`")
+            self.cred_info = self.credential
+
+        if self.cred_info.bearer_token.get_secret_value() == '':
+            if self.cred_info.consumer_key.get_secret_value() == '' \
+                    or self.cred_info.consumer_secret.get_secret_value() == '':
+                raise AttributeError(
+                    "consumer_key and consumer_secret required to generate bearer_token via Twitter"
+                )
+
+            self.cred_info.bearer_token = SecretStr(self.generate_bearer_token())
+
+        if self.max_tweets > 100:
+            logger.warning("Twitter API support max 100 tweets per call, hence resetting `max_tweets` to 100")
+            self.max_tweets = 100
+
+    def get_twitter_credentials(self) -> Dict[str, Any]:
+        if self.cred_info.bearer_token.get_secret_value() == '':
+            self.cred_info.bearer_token = SecretStr(self.generate_bearer_token())
+
+        return {
+            "bearer_token": self.cred_info.bearer_token.get_secret_value(),
+            "endpoint": self.cred_info.endpoint,
+            "extra_headers_dict": self.cred_info.extra_headers_dict,
+        }
+
+    # Copied from Twitter searchtweets-v2 lib
+    def generate_bearer_token(self) -> str:
+        """
+        Return the bearer token for a given pair of consumer key and secret values.
+        """
+        data = [("grant_type", "client_credentials")]
+        resp = requests.post(
+            TWITTER_OAUTH_ENDPOINT,
+            data=data,
+            auth=(
+                self.cred_info.consumer_key.get_secret_value(),
+                self.cred_info.consumer_secret.get_secret_value(),
+            ),
+        )
+        logger.warning("Grabbing bearer token from OAUTH")
+        if resp.status_code >= 400:
+            logger.error(resp.text)
+            resp.raise_for_status()
+
+        return str(resp.json()["access_token"])
+
+
+class TwitterSource(BaseSource):
+    NAME: str = "Twitter"
+
+    def lookup(self, config: TwitterSourceConfig, **kwargs: Any) -> List[TextPayload]:  # type: ignore[override]
+        if (
+            not config.query
+            and not config.keywords
+            and not config.hashtags
+            and not config.usernames
+        ):
+            raise AttributeError(
+                "At least one non empty parameter required (query, keywords, hashtags, and usernames)"
+            )
+
+        place_fields = (
+            ",".join(config.place_fields) if config.place_fields is not None else None
+        )
+        user_fields = (
+            ",".join(config.user_fields) if config.user_fields is not None else None
+        )
+        expansions = (
+            ",".join(config.expansions) if config.expansions is not None else None
+        )
+        tweet_fields = (
+            ",".join(config.tweet_fields) if config.tweet_fields is not None else None
+        )
+
+        # Get data from state
+        identifier: str = kwargs.get("id", None)
+        state: Optional[Dict[str, Any]] = (
+            None
+            if identifier is None or self.store is None
+            else self.store.get_source_state(identifier)
+        )
+        since_id: Optional[int] = (
+            config.since_id or None if state is None else state.get("since_id", None)
+        )
+        until_id: Optional[int] = (
+            config.until_id or None if state is None else state.get("until_id", None)
+        )
+        update_state: bool = True if identifier else False
+        state = state or dict()
+        max_tweet_id = since_id
+        lookup_period = config.lookup_period
+        if lookup_period is None:
+            start_time = None
+        elif len(lookup_period) <= 5:
+            start_time = convert_utc_time(lookup_period).replace(tzinfo=pytz.UTC)
+        else:
+            start_time = datetime.strptime(lookup_period, "%Y-%m-%dT%H:%M:%S%z")
+
+        if since_id or until_id:
+            lookup_period = None
+
+        query = self._generate_query_string(
+            query=config.query,
+            keywords=config.keywords,
+            hashtags=config.hashtags,
+            usernames=config.usernames,
+            operators=config.operators,
+        )
+
+        source_responses: List[TextPayload] = []
+
+        search_query = gen_request_parameters(
+            granularity=None,
+            query=query,
+            results_per_call=config.max_tweets,
+            place_fields=place_fields,
+            expansions=expansions,
+            user_fields=user_fields,
+            tweet_fields=tweet_fields,
+            since_id=since_id,
+            until_id=until_id,
+            start_time=lookup_period,
+            stringify=False,
+        )
+        logger.info(search_query)
+
+        tweets_output = collect_results(
+            query=search_query,
+            max_tweets=config.max_tweets,
+            result_stream_args=config.get_twitter_credentials(),
+        )
+
+        tweets: List[Dict[str, Any]] = []
+        users: List[Dict[str, Any]] = []
+        meta_info: Dict[str, Any] = {}
+
+        if not tweets_output and len(tweets_output) == 0:
+            logger.info("No Tweets found")
+        else:
+            tweets = tweets_output[0]["data"] if "data" in tweets_output[0] else tweets
+            if "includes" in tweets_output[0] and "users" in tweets_output[0]["includes"]:
+                users = tweets_output[0]["includes"]["users"]
+            meta_info = tweets_output[0]["meta"] if "meta" in tweets_output[0] else meta_info
+
+        # Extract user info and create user map
+        user_map: Dict[str, Dict[str, Any]] = {}
+        if len(users) > 0 and "id" in users[0]:
+            for user in users:
+                if "username" in user:
+                    user["user_url"] = f'https://twitter.com/{user["username"]}'
+                user_map[user["id"]] = user
+
+        logger.info(f"Twitter API meta_info='{meta_info}'")
+
+        for tweet in tweets:
+            if "author_id" in tweet and tweet["author_id"] in user_map:
+                tweet["author_info"] = user_map.get(tweet["author_id"])
+
+            source_responses.append(self._get_source_output(tweet))
+
+            if start_time:
+                created_date = datetime.strptime(
+                    tweet["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z"
+                )
+                if start_time > created_date:
+                    break
+
+        max_tweet_id = meta_info["newest_id"] if "newest_id" in meta_info else max_tweet_id
+        # min_tweet_id = meta_info["oldest_id"] if "oldest_id" in meta_info else min_tweet_id
+
+        if update_state and self.store is not None:
+            state["since_id"] = max_tweet_id
+            self.store.update_source_state(workflow_id=identifier, state=state)
+
+        return source_responses
+
+    @staticmethod
+    def _generate_query_string(
+        query: Optional[str] = None,
+        keywords: Optional[List[str]] = None,
+        hashtags: Optional[List[str]] = None,
+        usernames: Optional[List[str]] = None,
+        operators: Optional[List[str]] = None,
+    ) -> str:
+        if query:
+            return query
+
+        or_tokens = []
+        and_tokens = []
+
+        or_tokens_list = [keywords, hashtags, usernames]
+        for tokens in or_tokens_list:
+            if tokens:
+                if len(tokens) > 0:
+                    or_tokens.append(f'({" OR ".join(tokens)})')
+                else:
+                    or_tokens.append(f'{"".join(tokens)}')
+
+        and_query_str = ""
+        or_query_str = ""
+
+        if or_tokens:
+            if len(or_tokens) > 0:
+                or_query_str = f'{" OR ".join(or_tokens)}'
+            else:
+                or_query_str = f'{"".join(or_tokens)}'
+
+        if operators:
+            and_tokens.append(f'{" ".join(operators)}')
+
+        if and_tokens:
+            and_query_str = f' ({" ".join(and_tokens)})' if and_tokens else ""
+
+        return or_query_str + and_query_str
+
+    def _get_source_output(self, tweet: Dict[str, Any]) -> TextPayload:
+        tweet["tweet_url"] = f'https://twitter.com/twitter/statuses/{tweet["id"]}'
+        return TextPayload(
+            processed_text=tweet["text"], meta=tweet, source_name=self.NAME
+        )
diff --git a/obsei_module/obsei-master/obsei/source/website_crawler_source.py b/obsei_module/obsei-master/obsei/source/website_crawler_source.py
new file mode 100644
index 0000000000000000000000000000000000000000..f70b6241f6e3df506f8de420e262e6c10e3852e4
--- /dev/null
+++ b/obsei_module/obsei-master/obsei/source/website_crawler_source.py
@@ -0,0 +1,144 @@
+import json
+import logging
+from abc import abstractmethod
+from typing import List, Optional, Dict, Any
+
+import mmh3
+
+from obsei.payload import TextPayload
+from obsei.source.base_source import BaseSource, BaseSourceConfig
+
+logger = logging.getLogger(__name__)
+
+
+class BaseCrawlerConfig(BaseSourceConfig):
+    TYPE: str = "BaseCrawler"
+
+    @abstractmethod
+    def extract_url(self, url: str, url_id: Optional[str] = None) -> Dict[str, Any]:
+        pass
+
+    @abstractmethod
+    def find_urls(self, url: str) -> List[str]:
+        pass
+
+
+class TrafilaturaCrawlerConfig(BaseCrawlerConfig):
+    # To understand about these configuration params refer:
+    # https://trafilatura.readthedocs.io/
+    _output_format: str = "json"
+    TYPE: str = "Crawler"
+    urls: List[str]
+    include_comments: bool = False
+    include_tables: bool = True
+    no_fallback: bool = False
+    include_images: bool = False
+    include_formatting: bool = False
+    deduplicate: bool = True
+    no_ssl: bool = False
+    is_feed: bool = False
+    is_sitemap: bool = False
+    include_links: bool = True
+    target_language: Optional[str] = None
+    url_blacklist: Optional[List[str]] = None
+
+    def extract_url(self, url: str, url_id: Optional[str] = None) -> Dict[str, Any]:
+        try:
+            from trafilatura import extract, fetch_url
+        except:
+            logger.error("Trafilatura is not installed, install as follows: pip install trafilatura")
+            return {}
+
+        url_id = url_id or "{:02x}".format(mmh3.hash(url, signed=False))
+        url_content = fetch_url(
+            url=url,
+            no_ssl=self.no_ssl,
+        )
+        extracted_dict: Dict[str, Any] = {}
+        if url_content is not None:
+            extracted_data = extract(
+                filecontent=url_content,
+                record_id=url_id,
+                no_fallback=self.no_fallback,
+                output_format=self._output_format,
+                include_comments=self.include_comments,
+                include_tables=self.include_tables,
+                include_images=self.include_images,
+                include_formatting=self.include_formatting,
+                include_links=self.include_links,
+                deduplicate=self.deduplicate,
+                url_blacklist=self.url_blacklist,
+                target_language=self.target_language,
+            )
+
+            if extracted_data:
+                extracted_dict = json.loads(extracted_data)
+                if "raw-text" in extracted_dict:
+                    del extracted_dict["raw-text"]
+
+        return extracted_dict
+
+    def find_urls(self, url: str) -> List[str]:
+        try:
+            from trafilatura import feeds, sitemaps
+        except:
+            logger.error("Trafilatura is not installed, install as follows: pip install trafilatura")
+            return []
+
+        urls: List[str] = []
+        if self.is_sitemap:
+            urls = sitemaps.sitemap_search(url=url, target_lang=self.target_language)
+        elif self.is_feed:
+            urls = feeds.find_feed_urls(url=url, target_lang=self.target_language)
+
+        return urls
+
+
+class TrafilaturaCrawlerSource(BaseSource):
+    NAME: Optional[str] = "Crawler"
+
+    def lookup(  # type: ignore[override]
+        self, config: TrafilaturaCrawlerConfig, **kwargs: Any
+    ) -> List[TextPayload]:
+        source_responses: List[TextPayload] = []
+
+        final_urls = []
+        if config.is_sitemap or config.is_feed:
+            for url in config.urls:
+                final_urls.extend(config.find_urls(url=url))
+        else:
+            final_urls = config.urls
+
+        for url in final_urls:
+            extracted_data = config.extract_url(url=url)
+            if extracted_data is None:
+                logger.warning(f"Unable to crawl {url}, hence skipping it")
+                continue
+            comments = (
+                "" if "comments" not in extracted_data else extracted_data["comments"]
+            )
+            source_responses.append(
+                TextPayload(
+                    processed_text=f"{extracted_data['text']}. {comments}",
+                    meta=extracted_data,
+                    source_name=self.NAME,
+                )
+            )
+
+        return source_responses
+
+if __name__ == "__main__":
+    # Tạo đối tượng cấu hình với danh sách URL cần crawl
+    config = TrafilaturaCrawlerConfig(
+        urls=["https://nld.com.vn/10-tro-ly-cap-cao-cua-tong-thong-han-quoc-dong-loat-tu-chuc-196241204104419743.htm"]
+    )
+    
+    # Tạo đối tượng TrafilaturaCrawlerSource
+    source = TrafilaturaCrawlerSource()
+
+    # Sử dụng hàm lookup để crawl dữ liệu
+    results = source.lookup(config=config)
+
+    # In kết quả trả về
+    for result in results:
+        print(json.dumps(result.dict(), indent=4, ensure_ascii=False))
diff --git a/obsei_module/obsei-master/obsei/source/youtube_reviews.py b/obsei_module/obsei-master/obsei/source/youtube_reviews.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/obsei_module/obsei-master/obsei/source/youtube_scrapper.py b/obsei_module/obsei-master/obsei/source/youtube_scrapper.py
new file mode 100644
index 0000000000000000000000000000000000000000..406a03a522ddd39d29c846731bf69ad2e78b7d32
--- /dev/null
+++ b/obsei_module/obsei-master/obsei/source/youtube_scrapper.py
@@ -0,0 +1,111 @@
+import logging
+from datetime import datetime
+
+from pydantic import PrivateAttr
+from typing import Optional, List, Any, Dict
+
+from obsei.misc.utils import DEFAULT_LOOKUP_PERIOD, convert_utc_time, DATETIME_STRING_PATTERN
+from obsei.misc.youtube_reviews_scrapper import YouTubeCommentExtractor
+from obsei.payload import TextPayload
+from obsei.source.base_source import BaseSource, BaseSourceConfig
+
+logger = logging.getLogger(__name__)
+
+
+class YoutubeScrapperConfig(BaseSourceConfig):
+    _YT_VIDEO_URL: str = PrivateAttr('https://www.youtube.com/watch?v={video_id}')
+    TYPE: str = "YoutubeScrapper"
+    video_id: Optional[str] = None
+    video_url: Optional[str] = None
+    user_agent: str = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36'
+    sort_by: int = 1  # 0 = sort by popular, 1 = sort by recent
+    max_comments: Optional[int] = 20
+    fetch_replies: bool = False
+    lang_code: Optional[str] = None
+    sleep_time: float = 0.1
+    request_retries: int = 5
+    lookup_period: Optional[str] = None
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+
+        if not self.video_id and not self.video_url:
+            raise ValueError("Either `video_id` or `video_url` is required")
+
+        if not self.video_url:
+            self.video_url = self._YT_VIDEO_URL.format(video_id=self.video_id)
+
+
+class YoutubeScrapperSource(BaseSource):
+    NAME: Optional[str] = "YoutubeScrapper"
+
+    def lookup(self, config: YoutubeScrapperConfig, **kwargs: Any) -> List[TextPayload]:  # type: ignore[override]
+        source_responses: List[TextPayload] = []
+
+        # Get data from state
+        identifier: str = kwargs.get("id", None)
+        state: Optional[Dict[str, Any]] = (
+            None
+            if id is None or self.store is None
+            else self.store.get_source_state(identifier)
+        )
+        update_state: bool = True if identifier else False
+        state = state or dict()
+
+        lookup_period: str = state.get("since_time", config.lookup_period)
+        lookup_period = lookup_period or DEFAULT_LOOKUP_PERIOD
+        if len(lookup_period) <= 5:
+            since_time = convert_utc_time(lookup_period)
+        else:
+            since_time = datetime.strptime(lookup_period, DATETIME_STRING_PATTERN)
+
+        last_since_time: datetime = since_time
+        since_id: Optional[str] = state.get("since_id", None)
+        last_index = since_id
+
+        comments: Optional[List[Dict[str, Any]]] = None
+        try:
+            if not config.video_url:
+                raise RuntimeError("`video_url` in config should not be empty or None")
+
+            scrapper: YouTubeCommentExtractor = YouTubeCommentExtractor(
+                video_url=config.video_url,
+                user_agent=config.user_agent,
+                sort_by=config.sort_by,
+                max_comments=config.max_comments,
+                fetch_replies=config.fetch_replies,
+                lang_code=config.lang_code,
+                sleep_time=config.sleep_time,
+                request_retries=config.request_retries,
+            )
+
+            comments = scrapper.fetch_comments(until_datetime=since_time)
+        except RuntimeError as ex:
+            logger.warning(ex.__cause__)
+
+        comments = comments or []
+
+        for comment in comments:
+            source_responses.append(
+                TextPayload(
+                    processed_text=comment["text"],
+                    meta=comment,
+                    source_name=self.NAME,
+                )
+            )
+
+            comment_time = comment["time"]
+
+            if comment_time is not None and (last_since_time is None or last_since_time < comment_time):
+                last_since_time = comment_time
+            if last_index is None:
+                # Assuming list is sorted based on time
+                last_index = comment["comment_id"]
+
+        state["since_time"] = last_since_time.strftime(DATETIME_STRING_PATTERN)
+        state["since_id"] = last_index
+
+        if update_state and self.store is not None:
+            self.store.update_source_state(workflow_id=identifier, state=state)
+
+        return source_responses
diff --git a/obsei_module/obsei-master/obsei/workflow/__init__.py b/obsei_module/obsei-master/obsei/workflow/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/obsei_module/obsei-master/obsei/workflow/base_store.py b/obsei_module/obsei-master/obsei/workflow/base_store.py
new file mode 100644
index 0000000000000000000000000000000000000000..4cd761bdacac404cc85e076010ad0e88eb397661
--- /dev/null
+++ b/obsei_module/obsei-master/obsei/workflow/base_store.py
@@ -0,0 +1,34 @@
+from abc import abstractmethod
+from typing import Any, Dict, Optional
+
+from pydantic_settings import BaseSettings
+
+
+class BaseStore(BaseSettings):
+    @abstractmethod
+    def get_source_state(self, id: str) -> Optional[Dict[str, Any]]:
+        pass
+
+    @abstractmethod
+    def get_sink_state(self, id: str) -> Optional[Dict[str, Any]]:
+        pass
+
+    @abstractmethod
+    def get_analyzer_state(self, id: str) -> Optional[Dict[str, Any]]:
+        pass
+
+    @abstractmethod
+    def update_source_state(self, workflow_id: str, state: Dict[str, Any]) -> Optional[Any]:
+        pass
+
+    @abstractmethod
+    def update_sink_state(self, workflow_id: str, state: Dict[str, Any]) -> None:
+        pass
+
+    @abstractmethod
+    def update_analyzer_state(self, workflow_id: str, state: Dict[str, Any]) -> None:
+        pass
+
+    @abstractmethod
+    def delete_workflow(self, id: str) -> None:
+        pass
diff --git a/obsei_module/obsei-master/obsei/workflow/store.py b/obsei_module/obsei-master/obsei/workflow/store.py
new file mode 100644
index 0000000000000000000000000000000000000000..73501cf99d8e2c76bbb83ae046a94ac7b6a0738f
--- /dev/null
+++ b/obsei_module/obsei-master/obsei/workflow/store.py
@@ -0,0 +1,198 @@
+import json
+import logging
+from typing import Any, Dict, List, Optional
+from uuid import uuid4
+
+from pydantic import PrivateAttr
+from sqlalchemy import Column, DateTime, String, create_engine, func
+from sqlalchemy.ext.declarative import declarative_base
+from sqlalchemy.orm import sessionmaker
+
+from obsei.misc.utils import obj_to_json
+from obsei.workflow.base_store import BaseStore
+from obsei.workflow.workflow import WorkflowState, WorkflowConfig, Workflow
+
+logger = logging.getLogger(__name__)
+
+Base = declarative_base()  # type: Any
+
+
+class ORMBase(Base): # type: ignore
+    __abstract__ = True
+
+    id = Column(String(100), default=lambda: str(uuid4()), primary_key=True)
+    created = Column(DateTime, server_default=func.now())
+    updated = Column(DateTime, server_default=func.now(), server_onupdate=func.now())
+
+
+class WorkflowTable(ORMBase):
+    __tablename__ = "workflow"
+
+    config = Column(String(2000), nullable=False)
+    source_state = Column(String(500), nullable=True)
+    sink_state = Column(String(500), nullable=True)
+    analyzer_state = Column(String(500), nullable=True)
+
+
+class WorkflowStore(BaseStore):
+    _session: sessionmaker = PrivateAttr()
+
+    def __init__(self, url: str = "sqlite:///obsei.db", **data: Any):
+        super().__init__(**data)
+        engine = create_engine(url)
+        ORMBase.metadata.create_all(engine)
+        local_session = sessionmaker(bind=engine)
+        self._session = local_session()
+
+    def get(self, identifier: str) -> Optional[Workflow]:
+        row = self._session.query(WorkflowTable).filter_by(id=identifier).all()
+        return (
+            None
+            if row is None or len(row) == 0
+            else self._convert_sql_row_to_workflow_data(row[0])
+        )
+
+    def get_all(self) -> List[Workflow]:
+        rows = self._session.query(WorkflowTable).all()
+        return [self._convert_sql_row_to_workflow_data(row) for row in rows]
+
+    def get_workflow_state(self, identifier: str) -> Optional[WorkflowState]:
+        row = (
+            self._session.query(
+                WorkflowTable.source_state,
+                WorkflowTable.analyzer_state,
+                WorkflowTable.sink_state,
+            )
+            .filter(id=identifier)
+            .all()
+        )
+
+        return (
+            None
+            if row is None or len(row) == 0
+            else self._convert_sql_row_to_workflow_state(row[0])
+        )
+
+    def get_source_state(self, identifier: str) -> Optional[Dict[str, Any]]:
+        row = (
+            self._session.query(WorkflowTable.source_state)
+            .filter(WorkflowTable.id == identifier)
+            .all()
+        )
+        return None if row[0].source_state is None else json.loads(row[0].source_state)
+
+    def get_sink_state(self, identifier: str) -> Optional[Dict[str, Any]]:
+        row = self._session.query(WorkflowTable.sink_state).filter(id=identifier).all()
+        return None if row[0].sink_state is None else json.loads(row[0].sink_state)
+
+    def get_analyzer_state(self, identifier: str) -> Optional[Dict[str, Any]]:
+        row = self._session.query(WorkflowTable.analyzer_state).filter(id=identifier).all()
+        return (
+            None if row[0].analyzer_state is None else json.loads(row[0].analyzer_state)
+        )
+
+    def add_workflow(self, workflow: Workflow) -> None:
+        self._session.add(
+            WorkflowTable(
+                id=workflow.id,
+                config=obj_to_json(workflow.config),
+                source_state=obj_to_json(workflow.states.source_state),
+                sink_state=obj_to_json(workflow.states.sink_state),
+                analyzer_state=obj_to_json(workflow.states.analyzer_state),
+            )
+        )
+        self._commit_transaction()
+
+    def update_workflow(self, workflow: Workflow) -> None:
+        self._session.query(WorkflowTable).filter_by(id=workflow.id).update(
+            {
+                WorkflowTable.config: obj_to_json(workflow.config),
+                WorkflowTable.source_state: obj_to_json(workflow.states.source_state),
+                WorkflowTable.sink_state: obj_to_json(workflow.states.sink_state),
+                WorkflowTable.analyzer_state: obj_to_json(
+                    workflow.states.analyzer_state
+                ),
+            },
+            synchronize_session=False,
+        )
+        self._commit_transaction()
+
+    def update_workflow_state(self, workflow_id: str, workflow_state: WorkflowState) -> None:
+        self._session.query(WorkflowTable).filter_by(id=workflow_id).update(
+            {
+                WorkflowTable.source_state: obj_to_json(workflow_state.source_state),
+                WorkflowTable.sink_state: obj_to_json(workflow_state.sink_state),
+                WorkflowTable.analyzer_state: obj_to_json(
+                    workflow_state.analyzer_state
+                ),
+            },
+            synchronize_session=False,
+        )
+        self._commit_transaction()
+
+    def update_source_state(self, workflow_id: str, state: Dict[str, Any]) -> None:
+        self._session.query(WorkflowTable).filter_by(id=workflow_id).update(
+            {WorkflowTable.source_state: obj_to_json(state)}, synchronize_session=False
+        )
+        self._commit_transaction()
+
+    def update_sink_state(self, workflow_id: str, state: Dict[str, Any]) -> None:
+        self._session.query(WorkflowTable).filter_by(id=workflow_id).update(
+            {WorkflowTable.sink_state: obj_to_json(state)}, synchronize_session=False
+        )
+        self._commit_transaction()
+
+    def update_analyzer_state(self, workflow_id: str, state: Dict[str, Any]) -> None:
+        self._session.query(WorkflowTable).filter_by(id=workflow_id).update(
+            {WorkflowTable.analyzer_state: obj_to_json(state)},
+            synchronize_session=False,
+        )
+        self._commit_transaction()
+
+    def delete_workflow(self, id: str) -> None:
+        self._session.query(WorkflowTable).filter_by(id=id).delete()
+        self._commit_transaction()
+
+    def _commit_transaction(self) -> Any:
+        try:
+            self._session.commit()
+        except Exception as ex:
+            logger.error(f"Transaction rollback: {ex.__cause__}")
+            # Rollback is important here otherwise self.session will be in inconsistent state and next call will fail
+            self._session.rollback()
+            raise ex
+
+    @staticmethod
+    def _convert_sql_row_to_workflow_state(row: Any) -> Optional[WorkflowState]:
+
+        if row is None:
+            return None
+
+        source_state_dict = (
+            None if row.source_state is None else json.loads(row.source_state)
+        )
+        sink_state_dict = None if row.sink_state is None else json.loads(row.sink_state)
+        analyzer_state_dict = (
+            None if row.analyzer_state is None else json.loads(row.analyzer_state)
+        )
+
+        workflow_states: Optional[WorkflowState] = None
+        if source_state_dict or sink_state_dict or analyzer_state_dict:
+            workflow_states = WorkflowState(
+                source_state=source_state_dict,
+                sink_state=sink_state_dict,
+                analyzer_state=analyzer_state_dict,
+            )
+
+        return workflow_states
+
+    @staticmethod
+    def _convert_sql_row_to_workflow_data(row: Any) -> Workflow:
+
+        config_dict = json.loads(row.config)
+        workflow = Workflow(
+            id=row.id,
+            config=WorkflowConfig(**config_dict),
+            states=WorkflowStore._convert_sql_row_to_workflow_state(row),
+        )
+        return workflow
diff --git a/obsei_module/obsei-master/obsei/workflow/workflow.py b/obsei_module/obsei-master/obsei/workflow/workflow.py
new file mode 100644
index 0000000000000000000000000000000000000000..efd79d36988b1820781ecdd2c7527ed082178f23
--- /dev/null
+++ b/obsei_module/obsei-master/obsei/workflow/workflow.py
@@ -0,0 +1,38 @@
+from typing import Any, Dict, Optional
+from uuid import uuid4
+
+from pydantic import BaseModel, Field
+
+from obsei.analyzer.base_analyzer import BaseAnalyzerConfig
+from obsei.sink.base_sink import BaseSinkConfig
+from obsei.source.base_source import BaseSourceConfig
+
+
+class WorkflowConfig(BaseModel):
+    source_config: Optional[BaseSourceConfig] = None
+    sink_config: Optional[BaseSinkConfig] = None
+    analyzer_config: Optional[BaseAnalyzerConfig] = None
+    time_in_seconds: Optional[int] = None
+
+    class Config:
+        arbitrary_types_allowed = True
+
+
+class WorkflowState(BaseModel):
+    source_state: Optional[Dict[str, Any]] = None
+    sink_state: Optional[Dict[str, Any]] = None
+    analyzer_state: Optional[Dict[str, Any]] = None
+
+    class Config:
+        arbitrary_types_allowed = True
+        response_model_exclude_unset = True
+
+
+class Workflow(BaseModel):
+    id: str = str(uuid4())
+    config: WorkflowConfig
+    states: WorkflowState = Field(WorkflowState())
+
+    class Config:
+        arbitrary_types_allowed = True
+        response_model_exclude_unset = True
diff --git a/obsei_module/obsei-master/pyproject.toml b/obsei_module/obsei-master/pyproject.toml
new file mode 100644
index 0000000000000000000000000000000000000000..5bc7e67dbbf32032c4ec7306fdba773e17620872
--- /dev/null
+++ b/obsei_module/obsei-master/pyproject.toml
@@ -0,0 +1,131 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project]
+dynamic = ["version"]
+requires-python = ">=3.8"
+name = "obsei"
+authors = [{ name = "Lalit Pagaria", email = "lalit.pagaria@oraika.com" }]
+maintainers = [
+    { name = "Lalit Pagaria", email = "lalit.pagaria@oraika.com" },
+    { name = "Girish Patel", email = "girish.patel@oraika.com" }
+]
+description = "Obsei is an automation tool for text analysis need"
+readme = "README.md"
+license = { text = "Apache Version 2.0" }
+
+keywords = [
+    "workflow",
+    "customer-support",
+    "customer-feedback",
+    "low-code",
+    "automation",
+    "cognitive-automation",
+    "social-listening",
+    "customer-feedback-analysis",
+    "customer-experience",
+    "market-research",
+    "nlp",
+    "oraika",
+    "obsei"
+]
+
+classifiers = [
+    "Development Status :: 2 - Pre-Alpha",
+    "Intended Audience :: Developers",
+    "Intended Audience :: Customer Service",
+    "Intended Audience :: Science/Research",
+    "Intended Audience :: Information Technology",
+    "License :: OSI Approved :: Apache Software License",
+    "Operating System :: OS Independent",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.8",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Topic :: Scientific/Engineering :: Artificial Intelligence",
+    "Topic :: Software Development :: Libraries :: Python Modules",
+]
+
+dependencies = [
+    "pytz >= 2023.3.post1",
+    "pydantic >= 2.5.3",
+    "pydantic-settings >= 2.1.0",
+    "python-dateutil >= 2.8.2",
+    "SQLAlchemy >= 2.0.24",
+    "mmh3 >= 4.0.1",
+    "beautifulsoup4 >= 4.9.3",
+    "dateparser >= 1.2.0",
+    "requests >= 2.26.0",
+]
+
+
+[project.optional-dependencies]
+
+twitter-api = ["searchtweets-v2 >= 1.1.1"]
+google-play-scraper = ["google-play-scraper >= 1.2.4"]
+google-play-api = ["google-api-python-client >= 2.111.0"]
+app-store-scraper = ["app-store-reviews-reader >= 1.2"]
+reddit-scraper = ["reddit-rss-reader >= 1.3.2"]
+reddit-api = ["praw >= 7.7.1"]
+pandas = ["pandas >= 2.0.3"]
+google-news-scraper = ["GoogleNews >= 1.6.12"]
+facebook-api = ["python-facebook-api >= 0.15.0"]
+atlassian-api = ["atlassian-python-api >= 3.41.4"]
+elasticsearch = ["elasticsearch >= 8.11.1"]
+slack-api = ["slack-sdk >= 3.26.1"]
+
+source = [
+    "obsei[twitter-api,google-play-scraper,google-play-api,app-store-scraper]",
+    "obsei[reddit-scraper,reddit-api,pandas,google-news-scraper,facebook-api]",
+]
+
+sink = ["obsei[atlassian-api,elasticsearch,slack-api,pandas]"]
+
+analyzer = [
+    "torch >= 2.1.2",
+    "vaderSentiment >= 3.3.2",
+    "transformers >= 4.36.2",
+    "nltk >= 3.8.1",
+    "sentencepiece >= 0.1.99",
+    "presidio-analyzer >= 2.2.351",
+    "presidio-anonymizer >= 2.2.351",
+    "spacy >= 3.7.2",
+]
+
+dev = [
+    "pre-commit >= 2.20.0",
+    "black >= 22.10.0",
+    "mypy >= 0.991",
+    "types-requests",
+    "types-python-dateutil",
+    "types-PyYAML",
+    "types-dateparser",
+    "types-protobuf",
+    "types-pytz",
+    "pytest >= 7.2.0",
+    "pip-tools >= 6.10.0",
+    "coverage >= 6.5.0",
+]
+
+all = ["obsei[analyzer,source,sink]"]
+
+## GPL dependencies (these are optional)
+gpl = ["trafilatura >= 1.6.3"]
+
+[project.urls]
+repository = "https://github.com/obsei/obsei"
+homepage = "https://obsei.com"
+documentation = "https://obsei.com"
+changelog = "https://github.com/obsei/obsei/releases"
+
+[tool.hatch.build.targets.sdist]
+include = ["/obsei"]
+
+[tool.hatch.build.targets.wheel]
+packages = ["obsei"]
+
+[tool.hatch.version]
+path = "obsei/_version.py"
+
diff --git a/obsei_module/obsei-master/sample-ui/Dockerfile b/obsei_module/obsei-master/sample-ui/Dockerfile
new file mode 100644
index 0000000000000000000000000000000000000000..28f57d2ed7d39a985956290ad690cc3b0f1bf01b
--- /dev/null
+++ b/obsei_module/obsei-master/sample-ui/Dockerfile
@@ -0,0 +1,19 @@
+FROM python:3.10-slim-bullseye
+
+WORKDIR /home/user
+
+RUN apt-get update && apt-get install -y --no-install-recommends curl pkg-config cmake git g++ \
+    && apt-get clean autoclean && apt-get autoremove -y \
+    && rm -rf /var/lib/{apt,dpkg,cache,log}/
+
+COPY ui.py /home/user/
+COPY utils.py /home/user/
+COPY config.yaml /home/user/
+COPY requirements.txt /home/user/
+
+RUN pip install --upgrade pip
+RUN pip install --no-cache-dir -r requirements.txt
+
+EXPOSE 8501
+
+CMD ["streamlit", "run", "ui.py"]
diff --git a/obsei_module/obsei-master/sample-ui/README.md b/obsei_module/obsei-master/sample-ui/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..a16df6430ff4ed31b98c8e0a23f1c3f617ab395d
--- /dev/null
+++ b/obsei_module/obsei-master/sample-ui/README.md
@@ -0,0 +1,22 @@
+## Demo UI
+
+This is a minimal UI that can spin up to test Obsei. It's based on streamlit and is very easy to extend for your own use.
+
+![Screenshot](https://raw.githubusercontent.com/obsei/obsei-resources/master/images/obsei-ui-demo.png)
+
+## Usage
+
+### Option 1: Local
+Execute in this folder:
+```shell
+pip install -r requirements.txt
+streamlit run ui.py
+```
+
+### Option 2: Container
+
+Just run
+```
+docker run -d --name obesi-ui -p 8501:8501 obsei/obsei-ui-demo
+```
+You can find the UI at `http://localhost:8501`
diff --git a/obsei_module/obsei-master/sample-ui/config.yaml b/obsei_module/obsei-master/sample-ui/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..18cc7433b879c600a0313d2cceff6ff05e4cc4e2
--- /dev/null
+++ b/obsei_module/obsei-master/sample-ui/config.yaml
@@ -0,0 +1,372 @@
+source:
+  Youtube Scrapper:
+    _icon_: "https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/Youtube.png"
+    _help_:
+      - '`video url` is Youtube video url.'
+    source:
+      _target_: obsei.source.youtube_scrapper.YoutubeScrapperSource
+    config:
+      _target_: obsei.source.youtube_scrapper.YoutubeScrapperConfig
+      video_url: "https://www.youtube.com/watch?v=uZfns0JIlFk"
+      lookup_period: "1Y"
+      max_comments: 10
+  Appstore Scrapper:
+    _icon_: "https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/appstore.png"
+    _help_:
+      - '`app_url` is application url on app store.
+      - For example for Xcode - https://apps.apple.com/us/app/xcode/id497799835'
+    source:
+      _target_: obsei.source.appstore_scrapper.AppStoreScrapperSource
+    config:
+      _target_: obsei.source.appstore_scrapper.AppStoreScrapperConfig
+      app_url: "https://apps.apple.com/us/app/gmail-email-by-google/id422689480"
+      lookup_period: "1h"
+      max_count: 5
+  Playstore Scrapper:
+    _icon_: "https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/playstore.png"
+    _help_:
+      - '`app_url` is application url on play store'
+      - 'For example for Gmail - https://play.google.com/store/apps/details?id=com.google.android.gm&hl=en_IN&gl=US'
+    source:
+      _target_: obsei.source.playstore_scrapper.PlayStoreScrapperSource
+    config:
+      _target_: obsei.source.playstore_scrapper.PlayStoreScrapperConfig
+      app_url: "https://play.google.com/store/apps/details?id=com.google.android.gm&hl=en_IN&gl=US"
+      lookup_period: "1h"
+      max_count: 5
+  Maps Reviews Scrapper:
+    _icon_: "https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/google_maps.png"
+    _help_:
+      - 'Collect `api_key` from https://outscraper.com/'
+      - ''
+      - 'For `queries` enter google maps urls or place ids, for example'
+      - "https://www.google.co.in/maps/place/Taj+Mahal/@27.1751496,78.0399535,17z/data=!4m5!3m4!1s0x39747121d702ff6d:0xdd2ae4803f767dde!8m2!3d27.1751448!4d78.0421422"
+    source:
+      _target_: obsei.source.google_maps_reviews.OSGoogleMapsReviewsSource
+    config:
+      _target_: obsei.source.google_maps_reviews.OSGoogleMapsReviewsConfig
+      api_key: ''
+      queries:
+        - "https://www.google.co.in/maps/place/Taj+Mahal/@27.1751496,78.0399535,17z/data=!4m5!3m4!1s0x39747121d702ff6d:0xdd2ae4803f767dde!8m2!3d27.1751448!4d78.0421422"
+      number_of_reviews: 5
+  Reddit Scrapper:
+    _icon_: "https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/reddit.png"
+    _help_:
+      - 'Reddit subreddit, search etc rss url. For proper url refer following link -'
+      - 'https://www.reddit.com/r/pathogendavid/comments/tv8m9/pathogendavids_guide_to_rss_and_reddit/'
+    source:
+      _target_: obsei.source.reddit_source.RedditScrapperSource
+    config:
+      _target_: obsei.source.reddit_source.RedditScrapperConfig
+      url: 'https://www.reddit.com/r/wallstreetbets/comments/.rss?sort=new'
+      lookup_period: "1h"
+  Twitter:
+    _icon_: "https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/twitter.png"
+    _help_:
+      - '`query` accept search string, @user or #hashtags also'
+      - ''
+      - 'Need twitter `consumer key` and `secret`, get it from https://developer.twitter.com/en/apply-for-access'
+    source:
+      _target_: obsei.source.twitter_source.TwitterSource
+    config:
+      _target_: obsei.source.twitter_source.TwitterSourceConfig
+      query: "@Twitter"
+      lookup_period: "1h"
+      max_tweets: 10
+      cred_info:
+        _target_: obsei.source.twitter_source.TwitterCredentials
+        consumer_key: ''
+        consumer_secret: ''
+  Facebook:
+    _icon_: "https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/facebook.png"
+    _help_:
+      - '`page_id` is id of your facebook page'
+      - ''
+      - 'Need facebook app_id, app_secret and long_term_token. Get it from https://developers.facebook.com/apps/'
+    source:
+      _target_: obsei.source.facebook_source.FacebookSource
+    config:
+      _target_: obsei.source.facebook_source.FacebookSourceConfig
+      page_id: "110844591144719"
+      lookup_period: "1h"
+      cred_info:
+        _target_: obsei.source.facebook_source.FacebookCredentials
+        app_id: ''
+        app_secret: ''
+        long_term_token: ''
+  Email:
+    _icon_: "https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/gmail.png"
+    _help_:
+      - 'List of IMAP servers for most commonly used email providers https://www.systoolsgroup.com/imap/'
+      - ''
+      - 'Also, if you are using a `Gmail` account then make sure you allow less secure apps on your account'
+      - 'https://myaccount.google.com/lesssecureapps?pli=1'
+      - 'Also enable IMAP access - https://mail.google.com/mail/u/0/#settings/fwdandpop'
+    source:
+      _target_: obsei.source.email_source.EmailSource
+    config:
+      _target_: obsei.source.email_source.EmailConfig
+      imap_server: 'imap.gmail.com'
+      cred_info:
+        _target_: obsei.source.email_source.EmailCredInfo
+        username: ''
+        password: ''
+      lookup_period: "1h"
+  Reddit:
+    _icon_: "https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/reddit.png"
+    _help_:
+      - 'Reddit account `username` and `password` require'
+      - 'Enter list of `subreddits`'
+    source:
+      _target_: obsei.source.reddit_source.RedditSource
+    config:
+      _target_: obsei.source.reddit_source.RedditConfig
+      subreddits:
+        - 'wallstreetbets'
+      cred_info:
+        _target_: obsei.source.reddit_source.RedditCredInfo
+        username: ''
+        password: ''
+      lookup_period: "1h"
+  Google News:
+    _icon_: "https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/googlenews.png"
+    _help_:
+      - '`fetch_article` use crawler to fetch full article'
+    source:
+      _target_: obsei.source.google_news_source.GoogleNewsSource
+    config:
+      _target_: obsei.source.google_news_source.GoogleNewsConfig
+      query: "bitcoin"
+      max_results: 3
+      lookup_period: "1d"
+      fetch_article: true
+  Website Crawler:
+    _icon_: "https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/webcrawler.png"
+    _help_:
+      - '`package name` can be found at the end of the url of app in play store.'
+      - ''
+      - 'For example - https://play.google.com/store/apps/details?id=com.google.android.gm&hl=en&gl=US'
+      - '`com.google.android.gm` is the `package name` for xcode and `us` is `country`.'
+    source:
+      _target_: obsei.source.website_crawler_source.TrafilaturaCrawlerSource
+    config:
+      _target_: obsei.source.website_crawler_source.TrafilaturaCrawlerConfig
+      urls:
+        - 'https://obsei.github.io/obsei/'
+sink:
+  Panda Dataframe:
+    _icon_: "https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/pandas.svg"
+    _help_: null
+    sink:
+      _target_: obsei.sink.pandas_sink.PandasSink
+    config:
+      _target_: obsei.sink.pandas_sink.PandasSinkConfig
+  Logger:
+    _icon_: "https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/logger.png"
+    _help_: null
+    sink:
+      _target_: obsei.sink.logger_sink.LoggerSink
+    config:
+      _target_: obsei.sink.logger_sink.LoggerSinkConfig
+  Jira:
+    _icon_: "https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/jira.png"
+    _help_:
+      - 'For testing purpose you can start jira server locally'
+      - 'Refer https://developer.atlassian.com/server/framework/atlassian-sdk/atlas-run-standalone/'
+      - ''
+      - 'Provide `server url`, `username`  and `password` of the user'
+      - ''
+      - '`type` of issue to be created, for more information refer -'
+      - 'https://support.atlassian.com/jira-cloud-administration/docs/what-are-issue-types/'
+      - ''
+      - '`project` in which issue to be created, for more information refer -'
+      - 'https://support.atlassian.com/jira-software-cloud/docs/what-is-a-jira-software-project/'
+    sink:
+      _target_: obsei.sink.jira_sink.JiraSink
+    config:
+      _target_: obsei.sink.jira_sink.JiraSinkConfig
+      url: 'http://localhost:2990/jira'
+      username: ''
+      password: ''
+      issue_type:
+        name: "Task"
+      project:
+        key: ""
+  Zendesk:
+    _icon_: "https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/zendesk.png"
+    _help_:
+      - 'For custom domain refer http://docs.facetoe.com.au/zenpy.html#custom-domains'
+      - 'Provide zendesk `domain`'
+      - ''
+      - 'Provide `subdomain` if you have one'
+      - ''
+      - 'Provide zendesk account `email` and `password`'
+    sink:
+      _target_: obsei.sink.zendesk_sink.ZendeskSink
+    config:
+      _target_: obsei.sink.zendesk_sink.ZendeskSinkConfig
+      domain: "zendesk.com"
+      subdomain: null
+      cred_info:
+        _target_: obsei.sink.zendesk_sink.ZendeskCredInfo
+        email: ''
+        password: ''
+  Slack:
+    _icon_: "https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/slack.svg"
+    _help_:
+      - 'Provide slack bot/app `token`, for more detail refer -'
+      - 'https://slack.com/intl/en-de/help/articles/215770388-Create-and-regenerate-API-tokens'
+      - ''
+      - 'To get `channel id` refer -'
+      - 'https://stackoverflow.com/questions/40940327/what-is-the-simplest-way-to-find-a-slack-team-id-and-a-channel-id'
+    sink:
+      _target_: obsei.sink.slack_sink.SlackSink
+    config:
+      _target_: obsei.sink.slack_sink.SlackSinkConfig
+      slack_token: ''
+      channel_id: ''
+      jinja_template: |
+        :bell: Hi there!, a new notification by *Obsei*
+        >Content:
+        ```
+          {%- for key, value in payload.items() recursive%}
+            {%- if value is mapping -%}
+              {{loop(value.items())}}
+            {%- else %}
+              {{key}}: {{value}}
+            {%- endif %}
+          {%- endfor%}
+        ```
+  Elastic:
+    _icon_: "https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/elastic.png"
+    _help_:
+      - 'For testing purpose you can start Elasticsearch server locally via docker'
+      - '`docker run -d --name elasticsearch -p 9200:9200 -e "discovery.type=single-node" elasticsearch:7.9.2`'
+      - ''
+      - ' Provide server `hostname`, `port` along with `index` to be used'
+    sink:
+      _target_: obsei.sink.elasticsearch_sink.ElasticSearchSink
+    config:
+      _target_: obsei.sink.elasticsearch_sink.ElasticSearchSinkConfig
+      host: "localhost"
+      port: 9200
+      index_name: "test"
+  Http:
+    _icon_: "https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/http_api.png"
+    _help_:
+      - 'For testing purpose you can create mock http server via postman, refer -'
+      - 'https://learning.postman.com/docs/designing-and-developing-your-api/mocking-data/setting-up-mock/'
+      - ''
+      - 'Provide http server `url` and `headers`'
+    sink:
+      _target_: obsei.sink.http_sink.HttpSink
+    config:
+      _target_: obsei.sink.http_sink.HttpSinkConfig
+      url: 'https://localhost:8080/api/path'
+      headers:
+        Content-type: "application/json"
+analyzer:
+  Sentiment:
+    _icon_: "https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/sentiment.png"
+    Transformer:
+      _help_:
+        - 'For supported models refer https://huggingface.co/models?filter=zero-shot-classification'
+        - ''
+        - 'Possible device values are `auto` (cuda:0 if available otherwise cpu), `cpu` and `cuda:{id}` (cuda device id)'
+      config:
+        _target_: obsei.analyzer.sentiment_analyzer.TransformersSentimentAnalyzerConfig
+        labels:
+          - "positive"
+          - "negative"
+        multi_class_classification: false
+      analyzer:
+        _target_: obsei.analyzer.sentiment_analyzer.TransformersSentimentAnalyzer
+        model_name_or_path: "typeform/mobilebert-uncased-mnli"
+        device: "auto"
+    Vader:
+      _help_:
+        - 'Vader is less resource hungry dictionary based Vader Sentiment detector'
+      analyzer:
+        _target_: obsei.analyzer.sentiment_analyzer.VaderSentimentAnalyzer
+  Classification:
+    _icon_: "https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/classification.png"
+    Transformer:
+      _help_:
+        - 'For supported models refer https://huggingface.co/models?filter=zero-shot-classification'
+        - ''
+        - 'Provide classification `labels`, two labels "positive" and "negative" are added by default'
+        - ''
+        - 'Possible device values are `auto` (cuda:0 if available otherwise cpu), `cpu` and `cuda:{id}` (cuda device id)'
+      config:
+        _target_: obsei.analyzer.classification_analyzer.ClassificationAnalyzerConfig
+        labels:
+          - "service"
+          - "content"
+          - "interface"
+        multi_class_classification: true
+      analyzer:
+        _target_: obsei.analyzer.classification_analyzer.ZeroShotClassificationAnalyzer
+        model_name_or_path: "typeform/mobilebert-uncased-mnli"
+        device: "auto"
+  Named Entity Recognition:
+    _icon_: "https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/ner.png"
+    Transformer:
+      _help_:
+        - 'For supported models refer https://huggingface.co/models?filter=token-classification'
+        - ''
+        - 'Possible device values are `auto` (cuda:0 if available otherwise cpu), `cpu` and `cuda:{id}` (cuda device id)'
+      analyzer:
+        _target_: obsei.analyzer.ner_analyzer.TransformersNERAnalyzer
+        model_name_or_path: "elastic/distilbert-base-cased-finetuned-conll03-english"
+        device: "auto"
+    Spacy:
+      _help_:
+        - 'For supported models refer https://spacy.io/models'
+      analyzer:
+        _target_: obsei.analyzer.ner_analyzer.SpacyNERAnalyzer
+        model_name_or_path: "en_core_web_sm"
+  Translation:
+    _icon_: "https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/translator.png"
+    Transformer:
+      _help_:
+        - 'For supported models refer https://huggingface.co/models?pipeline_tag=translation'
+        - ''
+        - 'Possible device values are `auto` (cuda:0 if available otherwise cpu), `cpu` and `cuda:{id}` (cuda device id)'
+      analyzer:
+        _target_: obsei.analyzer.translation_analyzer.TranslationAnalyzer
+        model_name_or_path: "Helsinki-NLP/opus-mt-en-hi"
+        device: "auto"
+  PII Anonymizer:
+    _icon_: "https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/pii.png"
+    Presidio:
+      _help_:
+        - '`analyze_only` decide whether to return only pii analysis or anonymize text'
+        - ''
+        - '`return_decision_process` decide whether to return detail information about anonymization decision'
+        - ''
+        - 'For `nlp_engine_name` spacy and stanza nlp engines are supported, For more info refer -'
+        - 'https://microsoft.github.io/presidio/analyzer/developing_recognizers/#utilize-spacy-or-stanza'
+        - ''
+        - 'Provide `model_name` and `lang_code` of the model'
+      config:
+        _target_: obsei.analyzer.pii_analyzer.PresidioPIIAnalyzerConfig
+        analyze_only: false
+        return_decision_process: false
+      analyzer:
+        _target_: obsei.analyzer.pii_analyzer.PresidioPIIAnalyzer
+        engine_config:
+          _target_: obsei.analyzer.pii_analyzer.PresidioEngineConfig
+          nlp_engine_name: "spacy"
+          models:
+            - _target_: obsei.analyzer.pii_analyzer.PresidioModelConfig
+              model_name: "en_core_web_md"
+              lang_code: "en"
+  Dummy:
+    _icon_: "https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/dummy.png"
+    Dummy:
+      _help_:
+        - 'Dummy Analyzer, do nothing it simply used for transforming input to output'
+      config:
+        _target_: obsei.analyzer.dummy_analyzer.DummyAnalyzerConfig
+      analyzer:
+        _target_: obsei.analyzer.dummy_analyzer.DummyAnalyzer
diff --git a/obsei_module/obsei-master/sample-ui/requirements.txt b/obsei_module/obsei-master/sample-ui/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f5bd699bdd5d799a2f7ae1f47386fd1e49181ad5
--- /dev/null
+++ b/obsei_module/obsei-master/sample-ui/requirements.txt
@@ -0,0 +1,4 @@
+git+https://github.com/obsei/obsei@master#egg=obsei[all]
+streamlit
+trafilatura
+tornado>=6.3.2 # not directly required, pinned by Snyk to avoid a vulnerability
diff --git a/obsei_module/obsei-master/sample-ui/ui.py b/obsei_module/obsei-master/sample-ui/ui.py
new file mode 100644
index 0000000000000000000000000000000000000000..ce07ce6fb237f9daf68283f87f71d9b3cd6b6da3
--- /dev/null
+++ b/obsei_module/obsei-master/sample-ui/ui.py
@@ -0,0 +1,96 @@
+from utils import *
+
+current_path = pathlib.Path(__file__).parent.absolute().as_posix()
+configuration = get_obsei_config(current_path, "config.yaml")
+logo_url = "https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/obsei_200x200.png"
+
+st.set_page_config(page_title="Obsei Demo", layout="wide", page_icon=logo_url)
+
+st.title("Obsei Demo").markdown(
+    get_icon_name("Obsei Demo", logo_url, 60, 35), unsafe_allow_html=True
+)
+
+st.success(
+    """
+Please ⭐ the repo and share the feedback at https://github.com/obsei/obsei?utm_source=streamlit
+    """
+)
+st.warning(
+    """
+**Note:** Demo run will require some secure information based on source or sink selected,
+if you don't trust this environment please close the app.
+"""
+)
+
+(
+    pipeline_col,
+    spinner_col,
+    execute_col,
+    download_python_col,
+    download_yaml_col,
+) = st.columns([2, 2, 1, 1, 1])
+
+col_map = dict()
+col_map["source"], col_map["analyzer"], col_map["sink"] = st.columns([1, 1, 1])
+
+selected = {}
+name_map = {"source": "Observer", "analyzer": "Analyzer", "sink": "Informer"}
+
+for node_name, col in col_map.items():
+    item_list = [k for k in configuration[node_name].keys()]
+    selected[node_name] = col.selectbox(f"Select {name_map[node_name]}", item_list)
+
+icons = [get_icon_name(None, configuration[k][v]["_icon_"]) for k, v in selected.items()]
+pipeline_col.header("Pipeline").markdown(
+    f"**Pipeline:** {icons[0]} ➡➡ {icons[1]} ➡➡ {icons[2]}",
+    unsafe_allow_html=True,
+)
+
+generate_config = {}
+log_component = {}
+for node_name, node_value in selected.items():
+    type_config = configuration[node_name][node_value]
+    if node_name == "analyzer":
+        type_list = []
+        for config_key in type_config.keys():
+            if config_key != "_icon_":
+                type_list.append(config_key)
+        selected_type = col_map[node_name].selectbox(f"{name_map[node_name]} Type", type_list)
+        type_config = type_config[selected_type]
+
+    config = None
+    if "config" in type_config:
+        config = type_config["config"]
+        if type_config["_help_"] is not None:
+            with col_map[node_name].expander("Config Help Info", False):
+                help_area = "\n".join(type_config["_help_"])
+                st.code(f"{help_area}")
+
+    config_expander = None
+    if config is not None:
+        config_expander = col_map[node_name].expander(f"Configure {name_map[node_name]}", False)
+        render_config(config, config_expander)
+
+    if node_name == "analyzer" and node_name in type_config and len(type_config[node_name]) > 1:
+        config_expander = config_expander or col_map[node_name].expander(f"Configure {name_map[node_name]}", False)
+        render_config(type_config["analyzer"], config_expander)
+
+    generate_config[node_name] = type_config[node_name]
+    generate_config[f"{node_name}_config"] = config
+
+    log_expander = col_map[node_name].expander(f"{name_map[node_name]} Logs", True)
+    log_component[node_name] = log_expander.empty()
+    log_component[node_name].write("Run \"🚀 Execute\" first")
+
+python_code = generate_python(generate_config)
+yaml_code = generate_yaml(generate_config)
+
+execute_button = execute_col.button("🚀 Execute")
+if execute_button:
+    execute_workflow(generate_config, spinner_col, log_component)
+
+with download_python_col:
+    download_button(python_code, "generated-code.py", "🐍 Download (.py)")
+
+with download_yaml_col:
+    download_button(yaml_code, "generated-config.yaml", "📖 Download (.yaml)")
diff --git a/obsei_module/obsei-master/sample-ui/utils.py b/obsei_module/obsei-master/sample-ui/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..9dc10c38f86f86bebd6bb9e7fdd83af917bc6ed1
--- /dev/null
+++ b/obsei_module/obsei-master/sample-ui/utils.py
@@ -0,0 +1,216 @@
+import base64
+import logging
+import pathlib
+import re
+import sys
+import uuid
+
+import streamlit as st
+import yaml
+
+from obsei.configuration import ObseiConfiguration
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(stream=sys.stdout, level=logging.INFO)
+
+
+def img_to_bytes(img_path):
+    img_bytes = pathlib.Path(img_path).read_bytes()
+    encoded = base64.b64encode(img_bytes).decode()
+    return encoded
+
+
+# Copied from https://github.com/jrieke/traingenerator/blob/main/app/utils.py
+def download_button(
+    object_to_download, download_filename, button_text  # , pickle_it=False
+):
+    try:
+        # some strings <-> bytes conversions necessary here
+        b64 = base64.b64encode(object_to_download.encode()).decode()
+    except AttributeError as e:
+        b64 = base64.b64encode(object_to_download).decode()
+
+    button_uuid = str(uuid.uuid4()).replace("-", "")
+    button_id = re.sub("\d+", "", button_uuid)
+
+    custom_css = f"""
+        <style>
+            #{button_id} {{
+                display: inline-flex;
+                align-items: center;
+                justify-content: center;
+                background-color: rgb(255, 255, 255);
+                color: rgb(38, 39, 48);
+                padding: .25rem .75rem;
+                position: relative;
+                text-decoration: none;
+                border-radius: 4px;
+                border-width: 1px;
+                border-style: solid;
+                border-color: rgb(230, 234, 241);
+                border-image: initial;
+            }}
+            #{button_id}:hover {{
+                border-color: rgb(246, 51, 102);
+                color: rgb(246, 51, 102);
+            }}
+            #{button_id}:active {{
+                box-shadow: none;
+                background-color: rgb(246, 51, 102);
+                color: white;
+                }}
+        </style> """
+
+    dl_link = (
+        custom_css
+        + f'<a download="{download_filename}" id="{button_id}" href="data:file/txt;base64,{b64}">{button_text}</a><br><br>'
+    )
+    # dl_link = f'<a download="{download_filename}" id="{button_id}" href="data:file/txt;base64,{b64}"><input type="button" kind="primary" value="{button_text}"></a><br></br>'
+
+    st.markdown(dl_link, unsafe_allow_html=True)
+
+
+def get_obsei_config(current_path, file_name):
+    return ObseiConfiguration(
+        config_path=current_path,
+        config_filename=file_name,
+    ).configuration
+
+
+@st.cache
+def get_icon_name(name, icon, icon_size=40, font_size=1):
+    if not name:
+        return f'<img style="vertical-align:middle;margin:5px 5px" src="{icon}" width="{icon_size}" height="{icon_size}">'
+    return (
+        f'<p style="font-size:{font_size}px">'
+        f'<img style="vertical-align:middle;margin:1px 5px" src="{icon}" width="{icon_size}" height="{icon_size}">'
+        f"{name}</p>"
+    )
+
+
+def render_config(config, component, help_str=None, parent_key=None):
+    if config is None:
+        return
+
+    prefix = "" if parent_key is None else f"{parent_key}."
+    if help_str is not None:
+        with component.expander("Info", False):
+            help_area = "\n".join(help_str)
+            st.code(f"{help_area}")
+    for k, v in config.items():
+        if k == "_target_":
+            continue
+
+        if isinstance(v, dict):
+            render_config(v, component, None, k)
+        elif isinstance(v, list):
+            if len(v) == 0:
+                continue
+            is_object = isinstance(v[0], dict)
+            if is_object:
+                for idx, sub_element in enumerate(v):
+                    render_config(sub_element, component, None, f"{k}[{idx}]")
+            else:
+                text_data = component.text_area(
+                    f"{prefix}{k}", ", ".join(v), help="Comma separated list"
+                )
+                text_list = text_data.split(",")
+                config[k] = [text.strip() for text in text_list]
+        elif isinstance(v, bool):
+            options = [True, False]
+            selected_option = component.radio(f"{prefix}{k}", options, options.index(v))
+            config[k] = bool(selected_option)
+        else:
+            tokens = k.split("_")
+            is_secret = tokens[-1] in ["key", "password", "token", "secret"]
+            hint = (
+                "Enter value"
+                if "lookup" not in tokens
+                else "Format: `<number><d|h|m>` d=day, h=hour & m=minute"
+            )
+            config[k] = component.text_input(
+                f"{prefix}{k}",
+                v,
+                type="password" if is_secret else "default",
+                help=hint,
+            )
+
+
+def generate_python(generate_config):
+    return f"""
+from obsei.configuration import ObseiConfiguration
+
+# This is Obsei workflow path and filename
+config_path = "./"
+config_filename = "workflow.yml"
+
+# Extract config via yaml file using `config_path` and `config_filename`
+obsei_configuration = ObseiConfiguration(config_path=config_path, config_filename=config_filename)
+
+# Initialize objects using configuration
+source_config = obsei_configuration.initialize_instance("source_config")
+source = obsei_configuration.initialize_instance("source")
+analyzer = obsei_configuration.initialize_instance("analyzer")
+analyzer_config = obsei_configuration.initialize_instance("analyzer_config")
+sink_config = obsei_configuration.initialize_instance("sink_config")
+sink = obsei_configuration.initialize_instance("sink")
+
+# This will fetch information from configured source ie twitter, app store etc
+source_response_list = source.lookup(source_config)
+
+# This will execute analyzer (Sentiment, classification etc) on source data with provided analyzer_config
+# Analyzer will it's output to `segmented_data` inside `analyzer_response`
+analyzer_response_list = analyzer.analyze_input(
+    source_response_list=source_response_list,
+    analyzer_config=analyzer_config
+)
+
+# This will send analyzed output to configure sink ie Slack, Zendesk etc
+sink_response_list = sink.send_data(analyzer_response_list, sink_config)
+"""
+
+
+def generate_yaml(generate_config):
+    return yaml.dump(generate_config)
+
+
+def execute_workflow(generate_config, component=None, log_components=None):
+    progress_show = None
+    if component:
+        progress_show = component.empty()
+        progress_show.code("🏄🏄🏄 Processing 🐢🐢🐢")
+    try:
+        obsei_configuration = ObseiConfiguration(configuration=generate_config)
+
+        source_config = obsei_configuration.initialize_instance("source_config")
+        source = obsei_configuration.initialize_instance("source")
+
+        analyzer = obsei_configuration.initialize_instance("analyzer")
+        analyzer_config = obsei_configuration.initialize_instance("analyzer_config")
+
+        sink_config = obsei_configuration.initialize_instance("sink_config")
+        sink = obsei_configuration.initialize_instance("sink")
+
+        source_response_list = source.lookup(source_config)
+        log_components["source"].write([vars(response) for response in source_response_list])
+
+        analyzer_response_list = analyzer.analyze_input(
+            source_response_list=source_response_list, analyzer_config=analyzer_config
+        )
+        log_components["analyzer"].write([vars(response) for response in analyzer_response_list])
+
+        sink_response_list = sink.send_data(analyzer_response_list, sink_config)
+        if sink.TYPE == 'Pandas':
+            log_components["sink"].write(sink_response_list)
+        elif sink_response_list is not None:
+            log_components["sink"].write([vars(response) for response in sink_response_list])
+        else:
+            log_components["sink"].write("No Data")
+
+        if progress_show:
+            progress_show.code("🎉🎉🎉 Processing Complete!! 🍾🍾🍾")
+    except Exception as ex:
+        if progress_show:
+            progress_show.code(f"❗❗❗ Processing Failed!! 😞😞😞 \n 👉 ({str(ex)})")
+
+        raise ex
diff --git a/obsei_module/obsei-master/test/conftest.py b/obsei_module/obsei-master/test/conftest.py
new file mode 100644
index 0000000000000000000000000000000000000000..360d65e38781b46566fca08001bb422cff2370c3
--- /dev/null
+++ b/obsei_module/obsei-master/test/conftest.py
@@ -0,0 +1,77 @@
+import pytest
+
+from obsei.analyzer.classification_analyzer import (
+    ZeroShotClassificationAnalyzer,
+    TextClassificationAnalyzer,
+)
+from obsei.analyzer.ner_analyzer import TransformersNERAnalyzer, SpacyNERAnalyzer
+from obsei.analyzer.pii_analyzer import (
+    PresidioEngineConfig,
+    PresidioModelConfig,
+    PresidioPIIAnalyzer,
+)
+from obsei.analyzer.sentiment_analyzer import VaderSentimentAnalyzer
+from obsei.analyzer.translation_analyzer import TranslationAnalyzer
+from obsei.preprocessor.text_cleaner import TextCleaner
+from obsei.preprocessor.text_splitter import TextSplitter
+
+
+@pytest.fixture(scope="session")
+def zero_shot_analyzer():
+    return ZeroShotClassificationAnalyzer(
+        model_name_or_path="typeform/mobilebert-uncased-mnli",
+    )
+
+
+@pytest.fixture(scope="session")
+def text_classification_analyzer():
+    return TextClassificationAnalyzer(
+        model_name_or_path="obsei-ai/sell-buy-intent-classifier-bert-mini",
+    )
+
+
+@pytest.fixture(scope="session")
+def vader_analyzer():
+    return VaderSentimentAnalyzer()
+
+
+@pytest.fixture(scope="session")
+def trf_ner_analyzer():
+    return TransformersNERAnalyzer(
+        model_name_or_path="dbmdz/bert-large-cased-finetuned-conll03-english",
+        tokenizer_name="bert-base-cased",
+    )
+
+
+@pytest.fixture(scope="session")
+def spacy_ner_analyzer():
+    return SpacyNERAnalyzer(
+        model_name_or_path="en_core_web_sm",
+    )
+
+
+@pytest.fixture(scope="session")
+def translate_analyzer():
+    return TranslationAnalyzer(
+        model_name_or_path="Helsinki-NLP/opus-mt-hi-en", batch_size=1
+    )
+
+
+@pytest.fixture(scope="session")
+def pii_analyzer():
+    return PresidioPIIAnalyzer(
+        engine_config=PresidioEngineConfig(
+            nlp_engine_name="spacy",
+            models=[PresidioModelConfig(model_name="en_core_web_lg", lang_code="en")],
+        )
+    )
+
+
+@pytest.fixture(scope="session")
+def text_cleaner():
+    return TextCleaner()
+
+
+@pytest.fixture(scope="session")
+def text_splitter():
+    return TextSplitter()
diff --git a/obsei_module/obsei-master/test/test_analyzer.py b/obsei_module/obsei-master/test/test_analyzer.py
new file mode 100644
index 0000000000000000000000000000000000000000..b16088ba741fe812e60f737d6658faeb4f7282c0
--- /dev/null
+++ b/obsei_module/obsei-master/test/test_analyzer.py
@@ -0,0 +1,165 @@
+import pytest
+
+from obsei.analyzer.classification_analyzer import ClassificationAnalyzerConfig
+from obsei.payload import TextPayload
+from obsei.postprocessor.inference_aggregator import InferenceAggregatorConfig
+from obsei.postprocessor.inference_aggregator_function import (
+    ClassificationAverageScore,
+    ClassificationMaxCategories,
+)
+from obsei.preprocessor.text_splitter import TextSplitterConfig
+
+GOOD_TEXT = """If anyone is interested... these are our hosts. I can’t recommend them enough, Abc & Pbc.
+
+The unit is just lovely, you go to sleep & wake up to this incredible place, and you have use of a Weber grill and a ridiculously indulgent hot-tub under the stars"""
+
+BAD_TEXT = """I had the worst experience ever with XYZ in Egypt. Bad Cars, asking to pay in cash,  do not have enough fuel,  do not open AC,  wait far away from my location until the trip is cancelled,  call and ask about the destination then cancel, and more. Worst service."""
+
+MIXED_TEXT = """I am mixed"""
+
+TEXTS = [GOOD_TEXT, BAD_TEXT, MIXED_TEXT]
+
+BUY_INTENT = """I am interested in this style of PGN-ES-D-6150 /Direct drive energy saving servo motor price and in doing business with you. Could you please send me the quotation"""
+
+SELL_INTENT = """Black full body massage chair for sale."""
+
+BUY_SELL_TEXTS = [BUY_INTENT, SELL_INTENT]
+
+
+def test_zero_shot_analyzer(zero_shot_analyzer):
+    labels = ["facility", "food", "comfortable", "positive", "negative"]
+
+    source_responses = [
+        TextPayload(processed_text=text, source_name="sample") for text in TEXTS
+    ]
+    analyzer_responses = zero_shot_analyzer.analyze_input(
+        source_response_list=source_responses,
+        analyzer_config=ClassificationAnalyzerConfig(labels=labels),
+    )
+
+    assert len(analyzer_responses) == len(TEXTS)
+
+    for analyzer_response in analyzer_responses:
+        assert len(analyzer_response.segmented_data["classifier_data"]) == len(labels)
+        assert "positive" in analyzer_response.segmented_data["classifier_data"]
+        assert "negative" in analyzer_response.segmented_data["classifier_data"]
+
+
+@pytest.mark.parametrize(
+    "label_map, expected", [
+        (None, ["LABEL_1", "LABEL_0"]),
+        ({"LABEL_1": "Buy", "LABEL_0": "Sell"}, ["Buy", "Sell"])
+    ]
+)
+def test_text_classification_analyzer(text_classification_analyzer, label_map, expected):
+    source_responses = [
+        TextPayload(processed_text=text, source_name="sample")
+        for text in BUY_SELL_TEXTS
+    ]
+    analyzer_responses = text_classification_analyzer.analyze_input(
+        source_response_list=source_responses,
+        analyzer_config=ClassificationAnalyzerConfig(
+            label_map=label_map,
+        ),
+    )
+
+    assert len(analyzer_responses) == len(BUY_SELL_TEXTS)
+
+    for analyzer_response in analyzer_responses:
+        assert analyzer_response.segmented_data["classifier_data"] is not None
+        assert analyzer_response.segmented_data["classifier_data"].keys() <= set(expected)
+
+
+@pytest.mark.parametrize(
+    "aggregate_function", [ClassificationAverageScore(), ClassificationMaxCategories()]
+)
+def test_classification_analyzer_with_splitter_aggregator(
+    aggregate_function, zero_shot_analyzer
+):
+    labels = ["facility", "food", "comfortable", "positive", "negative"]
+
+    source_responses = [
+        TextPayload(processed_text=text, source_name="sample") for text in TEXTS
+    ]
+    analyzer_responses = zero_shot_analyzer.analyze_input(
+        source_response_list=source_responses,
+        analyzer_config=ClassificationAnalyzerConfig(
+            labels=labels,
+            use_splitter_and_aggregator=True,
+            splitter_config=TextSplitterConfig(max_split_length=50),
+            aggregator_config=InferenceAggregatorConfig(
+                aggregate_function=aggregate_function
+            ),
+        ),
+    )
+
+    assert len(analyzer_responses) == len(TEXTS)
+
+    for analyzer_response in analyzer_responses:
+        assert "aggregator_data" in analyzer_response.segmented_data
+
+
+def test_vader_analyzer(vader_analyzer):
+    source_responses = [
+        TextPayload(processed_text=text, source_name="sample") for text in TEXTS
+    ]
+    analyzer_responses = vader_analyzer.analyze_input(
+        source_response_list=source_responses
+    )
+
+    assert len(analyzer_responses) == len(TEXTS)
+
+    for analyzer_response in analyzer_responses:
+        assert len(analyzer_response.segmented_data["classifier_data"]) == 2
+        assert "positive" in analyzer_response.segmented_data["classifier_data"]
+        assert "negative" in analyzer_response.segmented_data["classifier_data"]
+
+
+def test_trf_ner_analyzer(trf_ner_analyzer):
+    source_responses = [
+        TextPayload(
+            processed_text="My name is Sam and I live in Berlin, Germany.",
+            source_name="sample",
+        )
+    ]
+    analyzer_responses = trf_ner_analyzer.analyze_input(
+        source_response_list=source_responses,
+    )
+    assert len(analyzer_responses) == 1
+
+    entities = analyzer_responses[0].segmented_data["ner_data"]
+    matched_count = 0
+    for entity in entities:
+        if entity["word"] == "Sam" and entity["entity_group"] == "PER":
+            matched_count = matched_count + 1
+        elif entity["word"] == "Berlin" and entity["entity_group"] == "LOC":
+            matched_count = matched_count + 1
+        elif entity["word"] == "Germany" and entity["entity_group"] == "LOC":
+            matched_count = matched_count + 1
+
+    assert matched_count == 3
+
+
+def test_spacy_ner_analyzer(spacy_ner_analyzer):
+    source_responses = [
+        TextPayload(
+            processed_text="My name is Sam and I live in Berlin, Germany.",
+            source_name="sample",
+        )
+    ]
+    analyzer_responses = spacy_ner_analyzer.analyze_input(
+        source_response_list=source_responses,
+    )
+    assert len(analyzer_responses) == 1
+
+    entities = analyzer_responses[0].segmented_data["ner_data"]
+    matched_count = 0
+    for entity in entities:
+        if entity["word"] == "Sam" and entity["entity_group"] == "PERSON":
+            matched_count = matched_count + 1
+        elif entity["word"] == "Berlin" and entity["entity_group"] == "GPE":
+            matched_count = matched_count + 1
+        elif entity["word"] == "Germany" and entity["entity_group"] == "GPE":
+            matched_count = matched_count + 1
+
+    assert matched_count == 3
diff --git a/obsei_module/obsei-master/test/test_imports.py b/obsei_module/obsei-master/test/test_imports.py
new file mode 100644
index 0000000000000000000000000000000000000000..02ca815d1872d0ddd4e506f2580ff2ca97da23e2
--- /dev/null
+++ b/obsei_module/obsei-master/test/test_imports.py
@@ -0,0 +1,65 @@
+
+def test_imports_all():
+    test_core()
+    test_sources()
+    test_sink()
+    test_analyzer()
+
+
+def test_sources():
+    from obsei.source.base_source import BaseSource, BaseSourceConfig
+    from obsei.source.appstore_scrapper import AppStoreScrapperSource, AppStoreScrapperConfig
+    from obsei.source.email_source import EmailSource, EmailConfig, EmailCredInfo
+    from obsei.source.facebook_source import FacebookSource, FacebookSourceConfig, FacebookCredentials
+    from obsei.source.google_news_source import GoogleNewsSource, GoogleNewsConfig
+    from obsei.source.pandas_source import PandasSource, PandasSourceConfig
+    from obsei.source.playstore_reviews import PlayStoreSource, PlayStoreConfig, GoogleCredInfo
+    from obsei.source.playstore_scrapper import PlayStoreScrapperSource, PlayStoreScrapperConfig
+    from obsei.source.reddit_source import RedditSource, RedditConfig, RedditCredInfo
+    from obsei.source.reddit_scrapper import RedditScrapperSource, RedditScrapperConfig
+    from obsei.source.twitter_source import TwitterSource, TwitterSourceConfig, TwitterCredentials
+    from obsei.source.website_crawler_source import BaseCrawlerConfig, TrafilaturaCrawlerSource, TrafilaturaCrawlerConfig
+
+
+def test_sink():
+    from obsei.sink.base_sink import BaseSink, BaseSinkConfig
+    from obsei.sink.dailyget_sink import DailyGetSink, DailyGetSinkConfig, PayloadConvertor
+    from obsei.sink.elasticsearch_sink import ElasticSearchSink, ElasticSearchSinkConfig
+    from obsei.sink.http_sink import HttpSink, HttpSinkConfig
+    from obsei.sink.jira_sink import JiraSink, JiraSinkConfig, JiraPayloadConvertor
+    from obsei.sink.logger_sink import LoggerSink, LoggerSinkConfig
+    from obsei.sink.pandas_sink import PandasSink, PandasSinkConfig, PandasConvertor
+    from obsei.sink.slack_sink import SlackSink, SlackSinkConfig
+    from obsei.sink.zendesk_sink import ZendeskSink, ZendeskSinkConfig
+
+
+def test_analyzer():
+    from obsei.analyzer.base_analyzer import BaseAnalyzer, BaseAnalyzerConfig
+    from obsei.analyzer.dummy_analyzer import DummyAnalyzer, DummyAnalyzerConfig
+    from obsei.analyzer.ner_analyzer import TransformersNERAnalyzer, SpacyNERAnalyzer
+    from obsei.analyzer.pii_analyzer import PresidioPIIAnalyzer, PresidioPIIAnalyzerConfig, PresidioAnonymizerConfig, PresidioModelConfig, PresidioEngineConfig
+    from obsei.analyzer.sentiment_analyzer import VaderSentimentAnalyzer, TransformersSentimentAnalyzerConfig, TransformersSentimentAnalyzer
+    from obsei.analyzer.translation_analyzer import TranslationAnalyzer
+    from obsei.analyzer.classification_analyzer import ClassificationAnalyzerConfig, ZeroShotClassificationAnalyzer, TextClassificationAnalyzer
+
+    from obsei.postprocessor.base_postprocessor import BasePostprocessor, BasePostprocessorConfig
+    from obsei.postprocessor.inference_aggregator import InferenceAggregatorConfig, InferenceAggregator
+    from obsei.postprocessor.inference_aggregator_function import BaseInferenceAggregateFunction, ClassificationAverageScore, ClassificationMaxCategories
+
+    from obsei.preprocessor.base_preprocessor import BaseTextPreprocessor, BaseTextProcessorConfig
+    from obsei.preprocessor.text_cleaner import TextCleaner, TextCleanerConfig
+    from obsei.preprocessor.text_splitter import TextSplitter, TextSplitterConfig, TextSplitterPayload
+    from obsei.preprocessor.text_tokenizer import BaseTextTokenizer, NLTKTextTokenizer
+    from obsei.preprocessor.text_cleaning_function import TextCleaningFunction, ToLowerCase, RemoveStopWords, \
+        RemovePunctuation, TokenStemming, RemoveSpecialChars, RemoveWhiteSpaceAndEmptyToken, DecodeUnicode, \
+        RemoveDateTime, ReplaceDomainKeywords, RegExSubstitute, SpacyLemmatization
+
+
+def test_core():
+    from obsei.configuration import ObseiConfiguration
+    from obsei.payload import BasePayload, TextPayload
+    from obsei.processor import Processor
+
+    from obsei.workflow.base_store import BaseStore
+    from obsei.workflow.store import WorkflowStore, WorkflowTable
+    from obsei.workflow.workflow import Workflow, WorkflowState, WorkflowConfig
diff --git a/obsei_module/obsei-master/test/test_inference_aggregator.py b/obsei_module/obsei-master/test/test_inference_aggregator.py
new file mode 100644
index 0000000000000000000000000000000000000000..aeb3b970d4ebcb7decab6c7dc1c749a5fa2766f2
--- /dev/null
+++ b/obsei_module/obsei-master/test/test_inference_aggregator.py
@@ -0,0 +1,4 @@
+
+# TDB
+# There are few more iteration required before stablizing this module (specially input and output configuration)
+# Few things are already being tested in test_classification_analyzer_with_splitter_aggregator function
diff --git a/obsei_module/obsei-master/test/test_pii_analyzer.py b/obsei_module/obsei-master/test/test_pii_analyzer.py
new file mode 100644
index 0000000000000000000000000000000000000000..494055fadec7eb2f5bd9ce99c140035f3b0910b7
--- /dev/null
+++ b/obsei_module/obsei-master/test/test_pii_analyzer.py
@@ -0,0 +1,88 @@
+from obsei.payload import TextPayload
+from obsei.analyzer.pii_analyzer import PresidioPIIAnalyzerConfig
+
+text_to_anonymize = "His name is Mr. Jones. His phone number is 212-555-5555 and email is jones@email.com"
+PII_LIST = ["Jones", "212-555-5555", "jones@email.com"]
+TEXTS = [text_to_anonymize]
+
+
+def test_pii_analyzer_replace_original(pii_analyzer):
+    analyzer_config = PresidioPIIAnalyzerConfig(
+        analyze_only=False, return_decision_process=True, replace_original_text=True
+    )
+
+    source_responses = [
+        TextPayload(processed_text=text, source_name="sample") for text in TEXTS
+    ]
+    analyzer_responses = pii_analyzer.analyze_input(
+        source_response_list=source_responses, analyzer_config=analyzer_config
+    )
+    assert len(analyzer_responses) == len(TEXTS)
+
+    for text, analyzer_response in zip(TEXTS, analyzer_responses):
+
+        assert analyzer_response.segmented_data is not None
+        assert analyzer_response.segmented_data["pii_data"] is not None
+        assert analyzer_response.segmented_data["pii_data"]["analyzer_result"] is not None
+        assert analyzer_response.segmented_data["pii_data"]["anonymized_result"] is not None
+        assert analyzer_response.segmented_data["pii_data"]["anonymized_text"] is not None
+
+        for pii_info in PII_LIST:
+            assert pii_info not in analyzer_response.segmented_data["pii_data"]["anonymized_text"]
+
+        assert (
+            analyzer_response.segmented_data["pii_data"]["anonymized_text"]
+            == analyzer_response.processed_text
+        )
+        assert analyzer_response.segmented_data["pii_data"]["anonymized_text"] != text
+
+
+def test_pii_analyzer_not_replace_original(pii_analyzer):
+    analyzer_config = PresidioPIIAnalyzerConfig(
+        analyze_only=False, return_decision_process=True, replace_original_text=False
+    )
+
+    source_responses = [
+        TextPayload(processed_text=text, source_name="sample") for text in TEXTS
+    ]
+    analyzer_responses = pii_analyzer.analyze_input(
+        source_response_list=source_responses, analyzer_config=analyzer_config
+    )
+    assert len(analyzer_responses) == len(TEXTS)
+
+    for text, analyzer_response in zip(TEXTS, analyzer_responses):
+
+        assert analyzer_response.segmented_data is not None
+        assert analyzer_response.segmented_data["pii_data"] is not None
+        assert analyzer_response.segmented_data["pii_data"]["analyzer_result"] is not None
+        assert analyzer_response.segmented_data["pii_data"]["anonymized_result"] is not None
+        assert analyzer_response.segmented_data["pii_data"]["anonymized_text"] is not None
+
+        for pii_info in PII_LIST:
+            assert pii_info not in analyzer_response.segmented_data["pii_data"]["anonymized_text"]
+
+        assert analyzer_response.processed_text == text
+        assert analyzer_response.segmented_data["pii_data"]["anonymized_text"] != text
+
+
+def test_pii_analyzer_analyze_only(pii_analyzer):
+    analyzer_config = PresidioPIIAnalyzerConfig(
+        analyze_only=True, return_decision_process=True
+    )
+
+    source_responses = [
+        TextPayload(processed_text=text, source_name="sample") for text in TEXTS
+    ]
+    analyzer_responses = pii_analyzer.analyze_input(
+        source_response_list=source_responses, analyzer_config=analyzer_config
+    )
+    assert len(analyzer_responses) == len(TEXTS)
+
+    for text, analyzer_response in zip(TEXTS, analyzer_responses):
+
+        assert analyzer_response.segmented_data is not None
+        assert analyzer_response.segmented_data["pii_data"] is not None
+        assert analyzer_response.segmented_data["pii_data"]["analyzer_result"] is not None
+        assert analyzer_response.segmented_data["pii_data"]["anonymized_result"] is None
+
+        assert text == analyzer_response.processed_text
diff --git a/obsei_module/obsei-master/test/test_text_cleaner.py b/obsei_module/obsei-master/test/test_text_cleaner.py
new file mode 100644
index 0000000000000000000000000000000000000000..c1cad2ac0c1d70d8fec57d4ba0b08425c92f6250
--- /dev/null
+++ b/obsei_module/obsei-master/test/test_text_cleaner.py
@@ -0,0 +1,177 @@
+from obsei.payload import TextPayload
+from obsei.preprocessor.text_cleaner import TextCleanerConfig
+from obsei.preprocessor.text_cleaning_function import DecodeUnicode, RemoveDateTime, RemovePunctuation, \
+    RemoveSpecialChars, RemoveStopWords, RemoveWhiteSpaceAndEmptyToken, ReplaceDomainKeywords, ToLowerCase, \
+    RegExSubstitute, SpacyLemmatization
+
+TEXT_WITH_WHITE_SPACES = """        If anyone is interested... these are our hosts. I can’t recommend them enough,
+Abc & Pbc.         """
+
+TEXT_WITH_PUNCTUATION = """I had the worst experience ever with XYZ in \"Egypt\". Bad Cars, asking to pay in cash,"""
+
+TEXT_WITH_SPECIAL_CHARACTERS = """#datascience @shahrukh & @lalit developing $obsei"""
+
+TEXT_WITH_DATE_TIME = (
+    """Peter drinks likely likes to tea at 16:45 every 15th May 2021"""
+)
+
+TEXT_WITH_DOMAIN_WORDS = (
+    """DL and ML are going to change the world and will not overfit"""
+)
+
+TEXT_WITH_STOP_WORDS = """In off then and hello, obsei"""
+
+TEXT_WITH_UPPER_CASE = """HOW IS THIS POSSIBLE???"""
+
+TEXT_WITH_UNICODE = """what is this \u0021 \u0021 \u0021"""
+
+
+def test_white_space_cleaner(text_cleaner):
+    request = TextPayload(processed_text=TEXT_WITH_WHITE_SPACES)
+
+    config = TextCleanerConfig(cleaning_functions=[RemoveWhiteSpaceAndEmptyToken()])
+    cleaner_responses = text_cleaner.preprocess_input(
+        config=config, input_list=[request]
+    )
+    cleaner_response = cleaner_responses[0]
+    assert (
+        """If anyone is interested ... these are our hosts . I can ’ t recommend them enough , Abc & Pbc ."""
+        == cleaner_response.processed_text
+    )
+
+
+def test_lower_case(text_cleaner):
+    request = TextPayload(processed_text=TEXT_WITH_UPPER_CASE)
+
+    config = TextCleanerConfig(cleaning_functions=[ToLowerCase()])
+    cleaner_responses = text_cleaner.preprocess_input(
+        config=config, input_list=[request]
+    )
+    cleaner_response = cleaner_responses[0]
+
+    assert "how is this possible ? ? ?" == cleaner_response.processed_text
+
+
+def test_remove_punctuation(text_cleaner):
+    request = TextPayload(processed_text=TEXT_WITH_PUNCTUATION)
+
+    config = TextCleanerConfig(cleaning_functions=[RemovePunctuation()])
+    cleaner_responses = text_cleaner.preprocess_input(
+        config=config, input_list=[request]
+    )
+    cleaner_response = cleaner_responses[0]
+    assert (
+        "I had the worst experience ever with XYZ in Egypt Bad Cars asking to pay in cash"
+        == cleaner_response.processed_text
+    )
+
+
+def test_remove_date_time(text_cleaner):
+    request = TextPayload(processed_text=TEXT_WITH_DATE_TIME)
+
+    config = TextCleanerConfig(cleaning_functions=[RemoveDateTime()])
+    cleaner_responses = text_cleaner.preprocess_input(
+        config=config, input_list=[request]
+    )
+    cleaner_response = cleaner_responses[0]
+    assert (
+        "Peter drinks likely likes to tea at every" == cleaner_response.processed_text
+    )
+
+
+def test_remove_stop_words(text_cleaner):
+    request = TextPayload(processed_text=TEXT_WITH_STOP_WORDS)
+
+    config = TextCleanerConfig(cleaning_functions=[RemoveStopWords(language="english")])
+    cleaner_responses = text_cleaner.preprocess_input(
+        config=config, input_list=[request]
+    )
+    cleaner_response = cleaner_responses[0]
+    assert "In hello , obsei" == cleaner_response.processed_text
+
+
+def test_remove_special_characters(text_cleaner):
+    request = TextPayload(processed_text=TEXT_WITH_SPECIAL_CHARACTERS)
+
+    config = TextCleanerConfig(cleaning_functions=[RemoveSpecialChars()])
+
+    cleaner_responses = text_cleaner.preprocess_input(
+        config=config, input_list=[request]
+    )
+    cleaner_response = cleaner_responses[0]
+    assert (
+        "datascience shahrukh lalit developing obsei" == cleaner_response.processed_text
+    )
+
+
+def test_replace_domain_keywords(text_cleaner):
+    request = TextPayload(processed_text=TEXT_WITH_DOMAIN_WORDS)
+
+    config = TextCleanerConfig(
+        cleaning_functions=[
+            ReplaceDomainKeywords(
+                domain_keywords=[("ML", "machine learning"), ("DL", "deep learning")]
+            )
+        ]
+    )
+
+    cleaner_responses = text_cleaner.preprocess_input(
+        config=config, input_list=[request]
+    )
+    cleaner_response = cleaner_responses[0]
+    assert (
+        "deep learning and machine learning are going to change the world and will not overfit"
+        == cleaner_response.processed_text
+    )
+
+
+def test_decode_unicode(text_cleaner):
+    request = TextPayload(processed_text=TEXT_WITH_UNICODE)
+
+    config = TextCleanerConfig(cleaning_functions=[DecodeUnicode()])
+
+    cleaner_responses = text_cleaner.preprocess_input(
+        config=config, input_list=[request]
+    )
+    cleaner_response = cleaner_responses[0]
+    assert "what is this ! ! !" == cleaner_response.processed_text
+
+
+def test_regex(text_cleaner):
+    request = TextPayload(processed_text="Obsei-is-a-lowcode-lib")
+
+    config = TextCleanerConfig(
+        cleaning_functions=[
+            RegExSubstitute(
+                pattern=r'-',
+                substitute=" "
+            )
+        ]
+    )
+
+    cleaner_responses = text_cleaner.preprocess_input(
+        config=config, input_list=[request]
+    )
+    cleaner_response = cleaner_responses[0]
+    assert (
+        "Obsei is a lowcode lib"
+        == cleaner_response.processed_text
+    )
+
+
+def test_spacy_lemmatizer(text_cleaner):
+    request = TextPayload(processed_text=u'the bats saw the cats with best stripes hanging upside down by their feet')
+
+    config = TextCleanerConfig(
+        disable_tokenization=True,
+        cleaning_functions=[SpacyLemmatization()]
+    )
+
+    cleaner_responses = text_cleaner.preprocess_input(
+        config=config, input_list=[request]
+    )
+    cleaner_response = cleaner_responses[0]
+    assert (
+        'the bat see the cat with good stripe hang upside down by their foot'
+        == cleaner_response.processed_text
+    )
diff --git a/obsei_module/obsei-master/test/test_text_splitter.py b/obsei_module/obsei-master/test/test_text_splitter.py
new file mode 100644
index 0000000000000000000000000000000000000000..10a24839e51e38e513b9598528aeef67c02559f1
--- /dev/null
+++ b/obsei_module/obsei-master/test/test_text_splitter.py
@@ -0,0 +1,97 @@
+import pytest
+
+from obsei.preprocessor.text_splitter import TextSplitterConfig
+from obsei.payload import TextPayload
+
+DOCUMENT_1 = """I love playing console games."""
+DOCUMENT_2 = """Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ bee-YON-say; born September 4, 1981)[6] is an American singer, songwriter, record producer, and actress. Born and raised in Houston, Texas, Beyoncé performed in various singing and dancing competitions as a child. She rose to fame in the late 1990s as the lead singer of Destiny's Child, one of the best-selling girl groups of all time. Their hiatus saw the release of her first solo album, Dangerously in Love (2003), which featured the US Billboard Hot 100 number-one singles "Crazy in Love" and "Baby Boy". Following the 2006 disbandment of Destiny's Child, she released her second solo album, B'Day, which contained hit singles "Irreplaceable" and "Beautiful Liar". Beyoncé also starred in multiple films such as The Pink Panther (2006), Dreamgirls (2006), Obsessed (2009), and The Lion King (2019). Her marriage to Jay-Z and her portrayal of Etta James in Cadillac Records (2008) influenced her third album, I Am... Sasha Fierce (2008), which earned a record-setting six Grammy Awards in 2010. It spawned the successful singles "If I Were a Boy", "Single Ladies (Put a Ring on It)", and "Halo". After splitting from her manager and father Mathew Knowles in 2010, Beyoncé released her musically diverse fourth album 4 in 2011. She later achieved universal acclaim for her sonically experimental visual albums, Beyoncé (2013) and Lemonade (2016), the latter of which was the world's best-selling album of 2016 and the most acclaimed album of her career, exploring themes of infidelity and womanism. In 2018, she released Everything Is Love, a collaborative album with her husband, Jay-Z, as the Carters. As a featured artist, Beyoncé topped the Billboard Hot 100 with the remixes of "Perfect" by Ed Sheeran in 2017 and "Savage" by Megan Thee Stallion in 2020. The same year, she released the musical film and visual album Black Is King to widespread acclaim."""
+DOCUMENT_3 = ''' Artificial intelligence (AI) is intelligence demonstrated by machines, as opposed to the natural intelligence displayed by humans or animals. Leading AI textbooks define the field as the study of "intelligent agents": any system that perceives its environment and takes actions that maximize its chance of achieving its goals.[a] Some popular accounts use the term "artificial intelligence" to describe machines that mimic "cognitive" functions that humans associate with the human mind, such as "learning" and "problem solving", however this definition is rejected by major AI researchers.
+
+AI applications include advanced web search engines (i.e. Google), recommendation systems (used by YouTube, Amazon and Netflix), understanding human speech (such as Siri or Alexa), self-driving cars (e.g. Tesla), and competing at the highest level in strategic game systems (such as chess and Go). As machines become increasingly capable, tasks considered to require "intelligence" are often removed from the definition of AI, a phenomenon known as the AI effect. For instance, optical character recognition is frequently excluded from things considered to be AI, having become a routine technology.
+
+Artificial intelligence was founded as an academic discipline in 1956, and in the years since has experienced several waves of optimism, followed by disappointment and the loss of funding (known as an "AI winter"), followed by new approaches, success and renewed funding. AI research has tried and discarded many different approaches during its lifetime, including simulating the brain, modeling human problem solving, formal logic, large databases of knowledge and imitating animal behavior. In the first decades of the 21st century, highly mathematical statistical machine learning has dominated the field, and this technique has proved highly successful, helping to solve many challenging problems throughout industry and academia. 
+'''
+DOC1_VAL = [29]
+
+
+@pytest.mark.parametrize(
+    "doc, expected_lengths, stride",
+    [
+        (DOCUMENT_1, DOC1_VAL, 0),
+        (DOCUMENT_1, DOC1_VAL, 128),
+        (DOCUMENT_2, [503, 512, 504, 384], 0),
+        (DOCUMENT_2, [503, 512, 507, 505, 394], 128),
+        (DOCUMENT_3, [511, 509, 512, 395], 0),
+        (DOCUMENT_3, [511, 512, 512, 512, 402], 128)
+    ]
+)
+def test_char_splits_without_paragraph_honor(doc, expected_lengths, stride, text_splitter):
+    doc_splits = text_splitter.preprocess_input(
+        input_list=[TextPayload(processed_text=doc)],
+        config=TextSplitterConfig(
+            max_split_length=512,
+            split_stride=stride
+        ),
+    )
+
+    assert len(expected_lengths) == len(doc_splits)
+    for text_payload, expected_length in zip(doc_splits, expected_lengths):
+        assert "splitter" in text_payload.meta
+        splitter_payload = text_payload.meta["splitter"]
+        assert splitter_payload.chunk_length == expected_length
+
+
+@pytest.mark.parametrize(
+    "doc, expected_lengths, stride",
+    [
+        (DOCUMENT_1, DOC1_VAL, 0),
+        (DOCUMENT_1, DOC1_VAL, 10),
+        (DOCUMENT_2, [126, 124, 123, 127, 125, 128, 119, 122, 124, 124, 125, 123, 126, 128, 128, 19], 0),
+        (DOCUMENT_2, [126, 125, 121, 122, 128, 121, 121, 125, 122, 125, 128, 122, 126, 124, 128, 127, 116], 10),
+        (DOCUMENT_3, [123, 124, 128, 118, 94, 128, 121, 115, 128, 103, 126, 127, 124, 125, 125, 104], 0),
+        (DOCUMENT_3, [123, 123, 120, 126, 123, 33, 128, 128, 124, 122, 123, 28, 126, 124, 123, 120, 122, 124, 67], 10)
+    ]
+)
+def test_char_splits_with_paragraph_honor(doc, expected_lengths, stride, text_splitter):
+    doc_splits = text_splitter.preprocess_input(
+        input_list=[TextPayload(processed_text=doc)],
+        config=TextSplitterConfig(
+            max_split_length=128,
+            split_stride=stride,
+            honor_paragraph_boundary=True,
+        ),
+    )
+
+    assert len(expected_lengths) == len(doc_splits)
+    for text_payload, expected_length in zip(doc_splits, expected_lengths):
+        assert "splitter" in text_payload.meta
+        splitter_payload = text_payload.meta["splitter"]
+        assert splitter_payload.chunk_length == expected_length
+
+
+@pytest.mark.parametrize(
+    "doc, expected_lengths, stride",
+    [
+        (DOCUMENT_1, DOC1_VAL, 0),
+        (DOCUMENT_1, DOC1_VAL, 10),
+        (DOCUMENT_2, [149, 108, 122, 172, 159, 133, 194, 100, 130, 270, 104, 155, 98], 0),
+        (DOCUMENT_2, [149, 108, 122, 172, 159, 133, 194, 100, 130, 270, 104, 155, 98], 10),
+        (DOCUMENT_3, [142, 184, 264, 57, 146, 92, 165, 135, 271, 220, 241], 0),
+        (DOCUMENT_3, [142, 184, 264, 57, 146, 92, 165, 135, 271, 220, 241], 10)
+    ]
+)
+def test_sentence_splits(doc, expected_lengths, stride, text_splitter):
+    doc_splits = text_splitter.preprocess_input(
+        input_list=[TextPayload(processed_text=doc)],
+        config=TextSplitterConfig(
+            max_split_length=512,
+            split_stride=stride,
+            enable_sentence_split=True,
+        ),
+    )
+
+    assert len(expected_lengths) == len(doc_splits)
+    for text_payload, expected_length in zip(doc_splits, expected_lengths):
+        assert "splitter" in text_payload.meta
+        splitter_payload = text_payload.meta["splitter"]
+        assert splitter_payload.chunk_length == expected_length
diff --git a/obsei_module/obsei-master/test/test_translator.py b/obsei_module/obsei-master/test/test_translator.py
new file mode 100644
index 0000000000000000000000000000000000000000..cda355b86cce4f3c09dff7af27a344b29ecdd299
--- /dev/null
+++ b/obsei_module/obsei-master/test/test_translator.py
@@ -0,0 +1,29 @@
+from obsei.payload import TextPayload
+
+GOOD_TEXT = """मुझे सब चीजे बहुत अच्छी लगी ।"""
+
+BAD_TEXT = """यह जीवन का सबसे बुरा अनुभव था । खराब कारें, नकद में भुगतान करने के लिए कह रहे हैं, पर्याप्त ईंधन नहीं है,
+एसी न खोलें, मेरे स्थान से बहुत दूर तक प्रतीक्षा करें जब तक कि यात्रा रद्द न हो जाए, कॉल करें और गंतव्य के बारे में पूछें, फिर रद्द करें, और बहुत कुछ।सबसे खराब सेवा। """
+MIXED_TEXT = """ठीक ठाक सेवा थी । बहुत कुछ खास नहीं ।"""
+EMOTICONS_TEXT = """Sab kuch theek hai ✌✌✌✌✌☝☝☝☝☝👌👌👌👌👌👌👍👍👍👍👍📿📿📿🛍🛍🕶🕳🕳👁🗨🗯👁‍🗨🖖👉✋💟👍😊"""
+HINGLISH_TEXT = """mera naam joker, tera naam kya ?"""
+
+TEXTS = [GOOD_TEXT, BAD_TEXT, MIXED_TEXT, EMOTICONS_TEXT, HINGLISH_TEXT]
+
+# for running in offline mode - https://huggingface.co/transformers/installation.html#offline-mode
+
+
+def test_translate_analyzer(translate_analyzer):
+    source_responses = [
+        TextPayload(processed_text=text, source_name="sample") for text in TEXTS
+    ]
+    analyzer_responses = translate_analyzer.analyze_input(
+        source_response_list=source_responses,
+    )
+    assert len(analyzer_responses) == len(TEXTS)
+
+    for text, analyzer_response in zip(TEXTS, analyzer_responses):
+        assert analyzer_response.segmented_data is not None
+        assert analyzer_response.segmented_data["translation_data"] is not None
+        assert text == analyzer_response.segmented_data["translation_data"]["original_text"]
+        assert text != analyzer_response.processed_text
diff --git a/obsei_module/obsei-master/tutorials/01_PlayStore_Classification_Logger.ipynb b/obsei_module/obsei-master/tutorials/01_PlayStore_Classification_Logger.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..da74d0f2e11792d6083ad34cce4d03aaa420c5d4
--- /dev/null
+++ b/obsei_module/obsei-master/tutorials/01_PlayStore_Classification_Logger.ipynb
@@ -0,0 +1,310 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "name": "01_PlayStore_Classification_Logger.ipynb",
+      "provenance": [],
+      "authorship_tag": "ABX9TyOk1SXBDdRZeDhvbn/BSTZY",
+      "include_colab_link": true
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "view-in-github",
+        "colab_type": "text"
+      },
+      "source": [
+        "<a href=\"https://colab.research.google.com/github/obsei/obsei/blob/master/example/Obsei_playstore_classification_logger_example.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "hVJynVqofWGx"
+      },
+      "source": [
+        "## Install latest Obsei"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "gz1fUXcEfQmv",
+        "outputId": "217e25ef-9906-4110-c8c9-f051972aae3b"
+      },
+      "source": [
+        "!pip install obsei[all]"
+      ],
+      "execution_count": 1,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "Collecting git+https://github.com/obsei/obsei.git\n",
+            "  Cloning https://github.com/obsei/obsei.git to /tmp/pip-req-build-qs53rb_m\n",
+            "  Running command git clone -q https://github.com/obsei/obsei.git /tmp/pip-req-build-qs53rb_m\n",
+            "Requirement already satisfied (use --upgrade to upgrade): obsei==0.0.6 from git+https://github.com/obsei/obsei.git in /usr/local/lib/python3.7/dist-packages\n",
+            "Requirement already satisfied: app-store-reviews-reader==1.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (1.0)\n",
+            "Requirement already satisfied: apscheduler==3.7.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (3.7.0)\n",
+            "Requirement already satisfied: atlassian-python-api==3.8.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (3.8.0)\n",
+            "Requirement already satisfied: beautifulsoup4==4.9.3 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (4.9.3)\n",
+            "Requirement already satisfied: cachetools==4.2.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (4.2.1)\n",
+            "Requirement already satisfied: certifi==2020.12.5 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (2020.12.5)\n",
+            "Requirement already satisfied: chardet==4.0.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (4.0.0)\n",
+            "Requirement already satisfied: click==7.1.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (7.1.2)\n",
+            "Requirement already satisfied: deprecated==1.2.12 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (1.2.12)\n",
+            "Requirement already satisfied: elasticsearch==7.11.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (7.11.0)\n",
+            "Requirement already satisfied: fastapi==0.63.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (0.63.0)\n",
+            "Requirement already satisfied: feedparser==6.0.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (6.0.2)\n",
+            "Requirement already satisfied: filelock==3.0.12 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (3.0.12)\n",
+            "Requirement already satisfied: flask==1.1.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (1.1.2)\n",
+            "Requirement already satisfied: google-api-core==1.26.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (1.26.1)\n",
+            "Requirement already satisfied: google-api-python-client==2.0.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (2.0.2)\n",
+            "Requirement already satisfied: google-auth-httplib2==0.1.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (0.1.0)\n",
+            "Requirement already satisfied: google-auth==1.28.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (1.28.0)\n",
+            "Requirement already satisfied: google-play-scraper==0.1.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (0.1.2)\n",
+            "Requirement already satisfied: googleapis-common-protos==1.53.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (1.53.0)\n",
+            "Requirement already satisfied: greenlet==1.0.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (1.0.0)\n",
+            "Requirement already satisfied: gunicorn==20.0.4 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (20.0.4)\n",
+            "Requirement already satisfied: h11==0.12.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (0.12.0)\n",
+            "Requirement already satisfied: httplib2==0.19.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (0.19.0)\n",
+            "Requirement already satisfied: httptools==0.1.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (0.1.1)\n",
+            "Requirement already satisfied: idna==2.10 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (2.10)\n",
+            "Requirement already satisfied: importlib-metadata==3.7.3 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (3.7.3)\n",
+            "Requirement already satisfied: itsdangerous==1.1.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (1.1.0)\n",
+            "Requirement already satisfied: jinja2==2.11.3 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (2.11.3)\n",
+            "Requirement already satisfied: joblib==1.0.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (1.0.1)\n",
+            "Requirement already satisfied: markupsafe==1.1.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (1.1.1)\n",
+            "Requirement already satisfied: mmh3==3.0.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (3.0.0)\n",
+            "Requirement already satisfied: numpy==1.20.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (1.20.1)\n",
+            "Requirement already satisfied: oauthlib==3.1.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (3.1.0)\n",
+            "Requirement already satisfied: packaging==20.9 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (20.9)\n",
+            "Requirement already satisfied: praw==7.2.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (7.2.0)\n",
+            "Requirement already satisfied: prawcore==2.0.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (2.0.0)\n",
+            "Requirement already satisfied: protobuf==3.15.6 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (3.15.6)\n",
+            "Requirement already satisfied: pyasn1-modules==0.2.8 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (0.2.8)\n",
+            "Requirement already satisfied: pyasn1==0.4.8 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (0.4.8)\n",
+            "Requirement already satisfied: pydantic==1.8.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (1.8.1)\n",
+            "Requirement already satisfied: pyparsing==2.4.7 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (2.4.7)\n",
+            "Requirement already satisfied: python-dateutil==2.8.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (2.8.1)\n",
+            "Requirement already satisfied: pytz==2021.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (2021.1)\n",
+            "Requirement already satisfied: pyyaml==5.4.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (5.4.1)\n",
+            "Requirement already satisfied: reddit-rss-reader==1.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (1.1)\n",
+            "Requirement already satisfied: regex==2021.3.17 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (2021.3.17)\n",
+            "Requirement already satisfied: requests-oauthlib==1.3.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (1.3.0)\n",
+            "Requirement already satisfied: requests==2.25.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (2.25.1)\n",
+            "Requirement already satisfied: rsa==4.7.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (4.7.2)\n",
+            "Requirement already satisfied: sacremoses==0.0.43 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (0.0.43)\n",
+            "Requirement already satisfied: searchtweets-v2==1.0.7 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (1.0.7)\n",
+            "Requirement already satisfied: sgmllib3k==1.0.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (1.0.0)\n",
+            "Requirement already satisfied: six==1.15.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (1.15.0)\n",
+            "Requirement already satisfied: slack-sdk==3.4.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (3.4.2)\n",
+            "Requirement already satisfied: soupsieve==2.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (2.2)\n",
+            "Requirement already satisfied: sqlalchemy==1.4.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (1.4.1)\n",
+            "Requirement already satisfied: starlette==0.13.6 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (0.13.6)\n",
+            "Requirement already satisfied: tokenizers==0.10.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (0.10.1)\n",
+            "Requirement already satisfied: torch==1.8.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (1.8.0+cu101)\n",
+            "Requirement already satisfied: tqdm==4.59.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (4.59.0)\n",
+            "Requirement already satisfied: transformers==4.4.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (4.4.1)\n",
+            "Requirement already satisfied: tweet-preprocessor==0.6.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (0.6.0)\n",
+            "Requirement already satisfied: typing-extensions==3.7.4.3 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (3.7.4.3)\n",
+            "Requirement already satisfied: tzlocal==2.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (2.1)\n",
+            "Requirement already satisfied: update-checker==0.18.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (0.18.0)\n",
+            "Requirement already satisfied: uritemplate==3.0.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (3.0.1)\n",
+            "Requirement already satisfied: urllib3==1.26.4 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (1.26.4)\n",
+            "Requirement already satisfied: uvicorn==0.13.4 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (0.13.4)\n",
+            "Requirement already satisfied: uvloop==0.15.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (0.15.2)\n",
+            "Requirement already satisfied: vadersentiment==3.3.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (3.3.2)\n",
+            "Requirement already satisfied: websocket-client==0.58.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (0.58.0)\n",
+            "Requirement already satisfied: werkzeug==1.0.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (1.0.1)\n",
+            "Requirement already satisfied: wrapt==1.12.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (1.12.1)\n",
+            "Requirement already satisfied: zenpy==2.0.24 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (2.0.24)\n",
+            "Requirement already satisfied: zipp==3.4.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (3.4.1)\n",
+            "Requirement already satisfied: setuptools>=0.7 in /usr/local/lib/python3.7/dist-packages (from apscheduler==3.7.0->obsei==0.0.6) (54.2.0)\n",
+            "Building wheels for collected packages: obsei\n",
+            "  Building wheel for obsei (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+            "  Created wheel for obsei: filename=obsei-0.0.6-cp37-none-any.whl size=50033 sha256=03495c0dc02b38e0495e9fd47c6e4d0d20a694230ef6bf51e172686f4e4e0c12\n",
+            "  Stored in directory: /tmp/pip-ephem-wheel-cache-tulmgikp/wheels/49/1a/6e/2fd83c9a275b7096fc615a0edef2d55b1fc33c3751ba45c1ad\n",
+            "Successfully built obsei\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "6LBKWAIcgVVQ"
+      },
+      "source": [
+        "## Configure Play Store Scrapper Source\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "jXrKXXD_goqM"
+      },
+      "source": [
+        "from obsei.source.playstore_scrapper import PlayStoreScrapperConfig, PlayStoreScrapperSource\n",
+        "\n",
+        "# initialize play store source config\n",
+        "source_config = PlayStoreScrapperConfig(\n",
+        "   # Need two parameters package_name and country. \n",
+        "   # `package_name` can be found at the end of the url of app in play store. \n",
+        "   # For example - https://play.google.com/store/apps/details?id=com.google.android.gm&hl=en&gl=US\n",
+        "   # `com.google.android.gm` is the package_name for xcode and `us` is country.\n",
+        "   countries=[\"us\"],\n",
+        "   package_name=\"com.google.android.gm\",\n",
+        "   max_count=10, # Number of reviews to fetch\n",
+        "   lookup_period=\"1h\" # Lookup period from current time, format: `<number><d|h|m>` (day|hour|minute)\n",
+        "\n",
+        ")\n",
+        "\n",
+        "# initialize play store reviews retriever\n",
+        "source = PlayStoreScrapperSource()"
+      ],
+      "execution_count": 2,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "H9MyrQBUguFG"
+      },
+      "source": [
+        "## Configure Text Classification Analyzer"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "P4NACxKog2lE"
+      },
+      "source": [
+        "from obsei.analyzer.classification_analyzer import ClassificationAnalyzerConfig, ZeroShotClassificationAnalyzer\n",
+        "\n",
+        "# initialize classification analyzer config\n",
+        "# It can also detect sentiments if \"positive\" and \"negative\" labels are added.\n",
+        "analyzer_config=ClassificationAnalyzerConfig(\n",
+        "   labels=[\"interface\", \"crash\", \"performance\"],\n",
+        ")\n",
+        "\n",
+        "# initialize classification analyzer\n",
+        "# For supported models refer https://huggingface.co/models?filter=zero-shot-classification\n",
+        "text_analyzer = ZeroShotClassificationAnalyzer(\n",
+        "   model_name_or_path=\"typeform/mobilebert-uncased-mnli\",\n",
+        "   device=\"auto\" # change to \"cuda:0\" for using gpu\n",
+        ")"
+      ],
+      "execution_count": 3,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "agsNOkhthVLy"
+      },
+      "source": [
+        "## Configure Logger Sink"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "Jz0xKBzRhdy1"
+      },
+      "source": [
+        "from obsei.sink.logger_sink import LoggerSink, LoggerSinkConfig\n",
+        "import logging\n",
+        "import sys\n",
+        "\n",
+        "logger = logging.getLogger(\"Obsei\")\n",
+        "logging.basicConfig(stream=sys.stdout, level=logging.INFO)\n",
+        "\n",
+        "# initialize logger sink config\n",
+        "sink_config = LoggerSinkConfig(\n",
+        "   logger=logger,\n",
+        "   level=logging.INFO\n",
+        ")\n",
+        "\n",
+        "# initialize logger sink\n",
+        "sink = LoggerSink()"
+      ],
+      "execution_count": 4,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "KvzpBr10hiZO"
+      },
+      "source": [
+        "## Execute Workflow/Pipeline"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "_tEXKQRHhhSs",
+        "outputId": "ef1ab9a9-818d-417b-eeb8-65ccb0bf35e7"
+      },
+      "source": [
+        "# This will fetch information from configured source\n",
+        "source_response_list = source.lookup(source_config)\n",
+        "\n",
+        "# This will execute analyzer\n",
+        "analyzer_response_list = text_analyzer.analyze_input(\n",
+        "    source_response_list=source_response_list,\n",
+        "    analyzer_config=analyzer_config\n",
+        ")\n",
+        "\n",
+        "# This will send analyzed output to sink\n",
+        "sink.send_data(analyzer_response_list, sink_config)"
+      ],
+      "execution_count": 5,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n"
+          ],
+          "name": "stderr"
+        },
+        {
+          "output_type": "stream",
+          "text": [
+            "INFO:Obsei:{'processed_text': 'Very safe app.I thank you for alerting of any problems.', 'segmented_data': {'positive': 0.989725649356842, 'interface': 0.7119664549827576, 'performance': 0.25247907638549805, 'crash': 0.012097842991352081, 'negative': 0.0014631063677370548}, 'meta': {'reviewId': 'gp:AOqpTOHapb-uwZEXjZcKD4FpilLyxqAQ7JzlExlrWF1zCOInfSTBeGlgfYRX-Fil6ztpezMjQD5bEhZCpH1rzvs', 'userName': 'Vickie Corneal', 'userImage': 'https://play-lh.googleusercontent.com/-REISJG7Hpl4/AAAAAAAAAAI/AAAAAAAAAAA/AMZuuclGPx3VL-9ZEUxxWDiPHGlA6gWn3A/photo.jpg', 'content': 'Very safe app.I thank you for alerting of any problems.', 'score': 4, 'thumbsUpCount': 0, 'reviewCreatedVersion': '2019.06.09.254811277.release', 'at': datetime.datetime(2021, 3, 26, 22, 22, 15), 'replyContent': None, 'repliedAt': None}, 'source_name': 'PlayStoreScrapper'}\n",
+            "INFO:Obsei:{'processed_text': \"Had a new phone (pixel 4a) since Jan 2021 & thought it was time to switch to Gmail app for my emails especially since it's ran be Google you'd think it would be flawless with Google's own hardware/software but no! Some of my emails were 5 hours late! So I ended up going back to yahoo mail app as I like to receive my emails instantly.\", 'segmented_data': {'interface': 0.554413378238678, 'positive': 0.5083023905754089, 'performance': 0.28407496213912964, 'negative': 0.13942506909370422, 'crash': 0.02029484324157238}, 'meta': {'reviewId': 'gp:AOqpTOGKj4kCNaXE_qhwAsSxxj6blBdsaeey55PS_m5lfQvJITOhcmxjTzxApImsKvXSVqsP5-JhvW8U1094idM', 'userName': 'Dean Crowder', 'userImage': 'https://play-lh.googleusercontent.com/a-/AOh14Gg9u7B9ARGqJ5trWFzJg5YCPNS3ZlnrRKYcRnHB_Ps', 'content': \"Had a new phone (pixel 4a) since Jan 2021 & thought it was time to switch to Gmail app for my emails especially since it's ran be Google you'd think it would be flawless with Google's own hardware/software but no! Some of my emails were 5 hours late! So I ended up going back to yahoo mail app as I like to receive my emails instantly.\", 'score': 1, 'thumbsUpCount': 0, 'reviewCreatedVersion': '2021.02.21.361635104.Release', 'at': datetime.datetime(2021, 3, 26, 22, 22, 7), 'replyContent': None, 'repliedAt': None}, 'source_name': 'PlayStoreScrapper'}\n",
+            "INFO:Obsei:{'processed_text': 'Keeps shutting down, why?', 'segmented_data': {'negative': 0.9541448354721069, 'crash': 0.25469809770584106, 'interface': 0.03516335040330887, 'performance': 0.004874388221651316, 'positive': 0.0006495638517662883}, 'meta': {'reviewId': 'gp:AOqpTOF2u0yBrFTGRxnxS5g6tcfO93Z2-lJnid_FLVjOsL7tIuDjnLjO3fX3nct84_V-OLCIYzWoanwWHroRP-I', 'userName': 'Honey Linus', 'userImage': 'https://play-lh.googleusercontent.com/-VUlX1zLb2UM/AAAAAAAAAAI/AAAAAAAAAAA/AMZuucm8GbnEB44neDntcgiJhJGMWiVWBA/photo.jpg', 'content': 'Keeps shutting down, why?', 'score': 1, 'thumbsUpCount': 0, 'reviewCreatedVersion': '2021.02.21.361635104.Release', 'at': datetime.datetime(2021, 3, 26, 22, 21, 28), 'replyContent': None, 'repliedAt': None}, 'source_name': 'PlayStoreScrapper'}\n",
+            "INFO:Obsei:{'processed_text': 'My email is the most important part of my phone; for the last three days I cannot open my email please fix my email', 'segmented_data': {'positive': 0.2119247317314148, 'negative': 0.10374686121940613, 'performance': 0.08057017624378204, 'interface': 0.06785237044095993, 'crash': 0.03853990137577057}, 'meta': {'reviewId': 'gp:AOqpTOE_5ZdXP4itD59tteamS13h--JKpgyM2ZePE2WeO2kGbdQNfMv0Lvtz5U56bDELfk23fSQ7hGIxn0VYopk', 'userName': 'Robin Midgett', 'userImage': 'https://play-lh.googleusercontent.com/-uEA8_VM9w9E/AAAAAAAAAAI/AAAAAAAAAAA/AMZuuclHsEJBE7TQHzsVXknrntQT6gPeng/photo.jpg', 'content': 'My email is the most important part of my phone; for the last three days I cannot open my email please fix my email', 'score': 1, 'thumbsUpCount': 0, 'reviewCreatedVersion': '2021.02.21.361635104.Release', 'at': datetime.datetime(2021, 3, 26, 22, 20, 48), 'replyContent': None, 'repliedAt': None}, 'source_name': 'PlayStoreScrapper'}\n",
+            "INFO:Obsei:{'processed_text': \"Not working. Since the update it won't open and keeps closing. Can't open emails or get into the options. I dont use a laptop and need this was great before. Pls fix!\", 'segmented_data': {'negative': 0.7904039621353149, 'positive': 0.4716719686985016, 'performance': 0.23418252170085907, 'interface': 0.10877904295921326, 'crash': 0.10495638847351074}, 'meta': {'reviewId': 'gp:AOqpTOFhPSOW09op95oQpBrNCdOSl26WLsAU1rAt3F9-y8FemnPL_VFZrepnrPiEObRGPEJhcaVV5oVGKvNGlCU', 'userName': 'Trevor Mathers', 'userImage': 'https://play-lh.googleusercontent.com/-JEjpB43nCpo/AAAAAAAAAAI/AAAAAAAAAAA/AMZuuclEQlpZBtcHIliCGJk7MGM91qstfg/photo.jpg', 'content': \"Not working. Since the update it won't open and keeps closing. Can't open emails or get into the options. I dont use a laptop and need this was great before. Pls fix!\", 'score': 1, 'thumbsUpCount': 0, 'reviewCreatedVersion': '2021.02.21.361635104.Release', 'at': datetime.datetime(2021, 3, 26, 22, 18, 51), 'replyContent': None, 'repliedAt': None}, 'source_name': 'PlayStoreScrapper'}\n",
+            "INFO:Obsei:{'processed_text': 'Great', 'segmented_data': {'positive': 0.9977073073387146, 'interface': 0.9308369159698486, 'performance': 0.8807284832000732, 'crash': 0.15913738310337067, 'negative': 0.00010683115397114307}, 'meta': {'reviewId': 'gp:AOqpTOHSuljYDFdw5IrSyIxbkR27Eat7O5eiN-i29WzQikfUf-99WnKRTHJERcaYrwM39BtmActC1F348HV3QX4', 'userName': 'Joylayar Arancana', 'userImage': 'https://play-lh.googleusercontent.com/a-/AOh14GhZMuo0osLrge4atF51DM3rqL0WyvufBFtUGPuB', 'content': 'Great', 'score': 5, 'thumbsUpCount': 0, 'reviewCreatedVersion': '2021.02.21.361635104.Release', 'at': datetime.datetime(2021, 3, 26, 22, 17, 24), 'replyContent': None, 'repliedAt': None}, 'source_name': 'PlayStoreScrapper'}\n",
+            "INFO:Obsei:{'processed_text': 'if is good', 'segmented_data': {'positive': 0.7685442566871643, 'performance': 0.49283134937286377, 'interface': 0.24619683623313904, 'crash': 0.035512909293174744, 'negative': 0.007003331556916237}, 'meta': {'reviewId': 'gp:AOqpTOFk9FtcQZpvLtOnR1nfbMXU-HZ8FPFMJASjU7Yl4roLEUOdQNLgnFQKK7upsonm4ZqJCblNUqxgX16wnFo', 'userName': 'Birhanu Asfaw', 'userImage': 'https://play-lh.googleusercontent.com/a-/AOh14Gil42bhO5cBS74mPevBSs6bAn6C8pGX-XHco-H67A', 'content': 'if is good', 'score': 4, 'thumbsUpCount': 0, 'reviewCreatedVersion': '2020.05.31.316831277.release', 'at': datetime.datetime(2021, 3, 26, 22, 17, 15), 'replyContent': None, 'repliedAt': None}, 'source_name': 'PlayStoreScrapper'}\n",
+            "INFO:Obsei:{'processed_text': \"For the past week, my email will close as soon as I get to open it.. won't evenet me read an email....\", 'segmented_data': {'negative': 0.8093071579933167, 'crash': 0.6003041863441467, 'positive': 0.22885845601558685, 'performance': 0.16369792819023132, 'interface': 0.06454633921384811}, 'meta': {'reviewId': 'gp:AOqpTOGfyAcn_81BVRToRhlolND3A2ufYmfMwyT77UG2y4k3iSiJk3mSRMA3YC75_49J4sJf4nPx8lJekf5ESk8', 'userName': 'Amanda Corey', 'userImage': 'https://play-lh.googleusercontent.com/-y3ydjAWoTWY/AAAAAAAAAAI/AAAAAAAAAAA/AMZuucmMEmAi0EDdIyBrE2cSWWTnYS81bg/photo.jpg', 'content': \"For the past week, my email will close as soon as I get to open it.. won't evenet me read an email....\", 'score': 2, 'thumbsUpCount': 0, 'reviewCreatedVersion': '2021.02.21.361635104.Release', 'at': datetime.datetime(2021, 3, 26, 22, 13, 28), 'replyContent': None, 'repliedAt': None}, 'source_name': 'PlayStoreScrapper'}\n",
+            "INFO:Obsei:{'processed_text': 'Nice', 'segmented_data': {'positive': 0.9959115982055664, 'interface': 0.8088926076889038, 'performance': 0.7435483336448669, 'crash': 0.19384728372097015, 'negative': 0.0002808143035508692}, 'meta': {'reviewId': 'gp:AOqpTOFlo19SGbbFA9MOij1H01uwoq_biUQz6V-XcRTNwekPcvhc52Sm8pbmzuLC4qQqMVpSVz7XQFDFEgP825o', 'userName': 'Arun Kujur', 'userImage': 'https://play-lh.googleusercontent.com/-qds9srrsDjM/AAAAAAAAAAI/AAAAAAAAAAA/AMZuucmrmBJyJhCXiiGP8B3Ov1T5rTai8g/photo.jpg', 'content': 'Nice', 'score': 4, 'thumbsUpCount': 0, 'reviewCreatedVersion': '2021.02.05.357775197.Release', 'at': datetime.datetime(2021, 3, 26, 22, 11, 26), 'replyContent': None, 'repliedAt': None}, 'source_name': 'PlayStoreScrapper'}\n",
+            "INFO:Obsei:{'processed_text': 'Facing notification issues...not getting any notification for all gmail linked accounts. Missing out on important mails on time.', 'segmented_data': {'negative': 0.975699245929718, 'crash': 0.08844484388828278, 'interface': 0.009598585776984692, 'performance': 0.0027882547583431005, 'positive': 3.1121893698582426e-05}, 'meta': {'reviewId': 'gp:AOqpTOHwKwiO8m1QlNqJxZ_GntG-RUdQy1jdH5v-0FTIzgbjxtxoidlQn53AH8F5UfLR2Ysp-kIZOnW1QQ-39U0', 'userName': 'kshitij Maheshwary', 'userImage': 'https://play-lh.googleusercontent.com/a-/AOh14GhKf5XKB1zsgXTXCw8zPudTli4qMHheoYHCEi2UXeI', 'content': 'Facing notification issues...not getting any notification for all gmail linked accounts. Missing out on important mails on time.', 'score': 1, 'thumbsUpCount': 0, 'reviewCreatedVersion': '2021.02.05.357775197.Release', 'at': datetime.datetime(2021, 3, 26, 22, 10, 58), 'replyContent': None, 'repliedAt': None}, 'source_name': 'PlayStoreScrapper'}\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    }
+  ]
+}
diff --git a/obsei_module/obsei-master/tutorials/02_PlayStore_PreProc_Classification_Pandas.ipynb b/obsei_module/obsei-master/tutorials/02_PlayStore_PreProc_Classification_Pandas.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..8c218ac74ab2a054c9df312e1e8c89353a4127a6
--- /dev/null
+++ b/obsei_module/obsei-master/tutorials/02_PlayStore_PreProc_Classification_Pandas.ipynb
@@ -0,0 +1,993 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "name": "02_PlayStore_PreProc_Classification_Pandas.ipynb",
+      "provenance": [],
+      "collapsed_sections": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Q5TZE9JpVaCi"
+      },
+      "source": [
+        "# Obsei Tutorial 02\n",
+        "## This example shows following Obsei workflow\n",
+        " 1. Observe: Play Store's app reviews\n",
+        " 2. Pre-process: Clean review text with properly\n",
+        " 3. Analyze: Classify review text within given category list\n",
+        " 4. Inform: Provide all data in Pandas DataFrame\n",
+        " 5. Store: Store data in Google Drive in CSV format"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "yoBarQI4UFAY"
+      },
+      "source": [
+        "## Install Obsei from latest code, perform these steps -\n",
+        "- Select GPU RunType for faster computation \n",
+        "- Restart Runtime after installation"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "Oh74E2T9HO-F",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "9465415f-1629-4cf4-9cda-513b5b0becf8"
+      },
+      "source": [
+        "!pip install obsei[all]"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "Collecting git+https://github.com/lalitpagaria/obsei.git\n",
+            "  Cloning https://github.com/lalitpagaria/obsei.git to /tmp/pip-req-build-9q4fz4j2\n",
+            "  Running command git clone -q https://github.com/lalitpagaria/obsei.git /tmp/pip-req-build-9q4fz4j2\n",
+            "Requirement already satisfied (use --upgrade to upgrade): obsei==0.0.9 from git+https://github.com/lalitpagaria/obsei.git in /usr/local/lib/python3.7/dist-packages\n",
+            "Requirement already satisfied: app-store-reviews-reader==1.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.2)\n",
+            "Requirement already satisfied: atlassian-python-api==3.10.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.10.0)\n",
+            "Requirement already satisfied: beautifulsoup4==4.9.3 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (4.9.3)\n",
+            "Requirement already satisfied: blis==0.7.4 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.7.4)\n",
+            "Requirement already satisfied: cachetools==4.2.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (4.2.2)\n",
+            "Requirement already satisfied: catalogue==2.0.4 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.0.4)\n",
+            "Requirement already satisfied: certifi==2021.5.30 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2021.5.30)\n",
+            "Requirement already satisfied: chardet==4.0.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (4.0.0)\n",
+            "Requirement already satisfied: click==7.1.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (7.1.2)\n",
+            "Requirement already satisfied: courlan==0.4.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.4.0)\n",
+            "Requirement already satisfied: cssselect==1.1.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.1.0)\n",
+            "Requirement already satisfied: cymem==2.0.5 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.0.5)\n",
+            "Requirement already satisfied: dateparser==1.0.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.0.0)\n",
+            "Requirement already satisfied: deprecated==1.2.12 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.2.12)\n",
+            "Requirement already satisfied: elasticsearch==7.13.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (7.13.1)\n",
+            "Requirement already satisfied: feedparser==6.0.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (6.0.2)\n",
+            "Requirement already satisfied: filelock==3.0.12 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.0.12)\n",
+            "Requirement already satisfied: gnews==0.1.3 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.1.3)\n",
+            "Requirement already satisfied: google-api-core==1.30.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.30.0)\n",
+            "Requirement already satisfied: google-api-python-client==2.8.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.8.0)\n",
+            "Requirement already satisfied: google-auth==1.30.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.30.2)\n",
+            "Requirement already satisfied: google-auth-httplib2==0.1.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.1.0)\n",
+            "Requirement already satisfied: google-play-scraper==1.0.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.0.0)\n",
+            "Requirement already satisfied: googleapis-common-protos==1.53.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.53.0)\n",
+            "Requirement already satisfied: greenlet==1.1.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.1.0)\n",
+            "Requirement already satisfied: htmldate==0.8.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.8.1)\n",
+            "Requirement already satisfied: httplib2==0.19.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.19.1)\n",
+            "Requirement already satisfied: huggingface-hub==0.0.8 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.0.8)\n",
+            "Requirement already satisfied: idna==2.10 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.10)\n",
+            "Requirement already satisfied: importlib-metadata==4.5.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (4.5.0)\n",
+            "Requirement already satisfied: jinja2==3.0.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.0.1)\n",
+            "Requirement already satisfied: joblib==1.0.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.0.1)\n",
+            "Requirement already satisfied: justext==2.2.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.2.0)\n",
+            "Requirement already satisfied: lxml==4.6.3 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (4.6.3)\n",
+            "Requirement already satisfied: markupsafe==2.0.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.0.1)\n",
+            "Requirement already satisfied: mmh3==3.0.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.0.0)\n",
+            "Requirement already satisfied: murmurhash==1.0.5 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.0.5)\n",
+            "Requirement already satisfied: nltk==3.6.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.6.2)\n",
+            "Requirement already satisfied: numpy==1.20.3 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.20.3)\n",
+            "Requirement already satisfied: oauthlib==3.1.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.1.1)\n",
+            "Requirement already satisfied: packaging==20.9 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (20.9)\n",
+            "Requirement already satisfied: pandas==1.2.4 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.2.4)\n",
+            "Requirement already satisfied: pathy==0.5.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.5.2)\n",
+            "Requirement already satisfied: praw==7.2.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (7.2.0)\n",
+            "Requirement already satisfied: prawcore==2.1.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.1.0)\n",
+            "Requirement already satisfied: preshed==3.0.5 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.0.5)\n",
+            "Requirement already satisfied: presidio-analyzer==2.2.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.2.1)\n",
+            "Requirement already satisfied: presidio-anonymizer==2.2.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.2.1)\n",
+            "Requirement already satisfied: protobuf==3.17.3 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.17.3)\n",
+            "Requirement already satisfied: pyasn1==0.4.8 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.4.8)\n",
+            "Requirement already satisfied: pyasn1-modules==0.2.8 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.2.8)\n",
+            "Requirement already satisfied: pycryptodome==3.10.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.10.1)\n",
+            "Requirement already satisfied: pydantic==1.7.4 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.7.4)\n",
+            "Requirement already satisfied: pyparsing==2.4.7 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.4.7)\n",
+            "Requirement already satisfied: python-dateutil==2.8.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.8.1)\n",
+            "Requirement already satisfied: python-facebook-api==0.9.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.9.2)\n",
+            "Requirement already satisfied: pytz==2021.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2021.1)\n",
+            "Requirement already satisfied: pyyaml==5.4.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (5.4.1)\n",
+            "Requirement already satisfied: readability-lxml==0.8.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.8.1)\n",
+            "Requirement already satisfied: reddit-rss-reader==1.3.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.3.2)\n",
+            "Requirement already satisfied: regex==2020.11.13 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2020.11.13)\n",
+            "Requirement already satisfied: requests==2.25.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.25.1)\n",
+            "Requirement already satisfied: requests-file==1.5.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.5.1)\n",
+            "Requirement already satisfied: requests-oauthlib==1.3.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.3.0)\n",
+            "Requirement already satisfied: rsa==4.7.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (4.7.2)\n",
+            "Requirement already satisfied: sacremoses==0.0.45 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.0.45)\n",
+            "Requirement already satisfied: searchtweets-v2==1.0.7 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.0.7)\n",
+            "Requirement already satisfied: sentencepiece==0.1.95 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.1.95)\n",
+            "Requirement already satisfied: sgmllib3k==1.0.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.0.0)\n",
+            "Requirement already satisfied: six==1.16.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.16.0)\n",
+            "Requirement already satisfied: slack-sdk==3.6.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.6.0)\n",
+            "Requirement already satisfied: smart-open==3.0.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.0.0)\n",
+            "Requirement already satisfied: soupsieve==2.2.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.2.1)\n",
+            "Requirement already satisfied: spacy==3.0.5 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.0.5)\n",
+            "Requirement already satisfied: spacy-legacy==3.0.5 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.0.5)\n",
+            "Requirement already satisfied: sqlalchemy==1.4.17 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.4.17)\n",
+            "Requirement already satisfied: srsly==2.4.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.4.1)\n",
+            "Requirement already satisfied: thinc==8.0.4 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (8.0.4)\n",
+            "Requirement already satisfied: tld==0.12.6 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.12.6)\n",
+            "Requirement already satisfied: tldextract==3.1.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.1.0)\n",
+            "Requirement already satisfied: tokenizers==0.10.3 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.10.3)\n",
+            "Requirement already satisfied: tqdm==4.61.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (4.61.0)\n",
+            "Requirement already satisfied: trafilatura==0.8.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.8.2)\n",
+            "Requirement already satisfied: transformers==4.6.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (4.6.1)\n",
+            "Requirement already satisfied: tweet-preprocessor==0.6.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.6.0)\n",
+            "Requirement already satisfied: typer==0.3.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.3.2)\n",
+            "Requirement already satisfied: typing-extensions==3.10.0.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.10.0.0)\n",
+            "Requirement already satisfied: tzlocal==2.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.1)\n",
+            "Requirement already satisfied: update-checker==0.18.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.18.0)\n",
+            "Requirement already satisfied: uritemplate==3.0.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.0.1)\n",
+            "Requirement already satisfied: urllib3==1.26.5 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.26.5)\n",
+            "Requirement already satisfied: vadersentiment==3.3.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.3.2)\n",
+            "Requirement already satisfied: wasabi==0.8.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.8.2)\n",
+            "Requirement already satisfied: websocket-client==1.0.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.0.1)\n",
+            "Requirement already satisfied: wrapt==1.12.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.12.1)\n",
+            "Requirement already satisfied: zenpy==2.0.24 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.0.24)\n",
+            "Requirement already satisfied: zipp==3.4.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.4.1)\n",
+            "Requirement already satisfied: torch==1.8.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.8.1)\n",
+            "Requirement already satisfied: setuptools>=40.3.0 in /usr/local/lib/python3.7/dist-packages (from google-api-core==1.30.0->obsei==0.0.9) (57.0.0)\n",
+            "Requirement already satisfied: responses>=0.11 in /usr/local/lib/python3.7/dist-packages (from python-facebook-api==0.9.2->obsei==0.0.9) (0.13.3)\n",
+            "Requirement already satisfied: cattrs<2.0,>=1.1; python_version >= \"3.7\" and python_version < \"4.0\" in /usr/local/lib/python3.7/dist-packages (from python-facebook-api==0.9.2->obsei==0.0.9) (1.7.1)\n",
+            "Requirement already satisfied: attrs<21.0.0,>=20.1.0 in /usr/local/lib/python3.7/dist-packages (from python-facebook-api==0.9.2->obsei==0.0.9) (20.3.0)\n",
+            "Building wheels for collected packages: obsei\n",
+            "  Building wheel for obsei (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+            "  Created wheel for obsei: filename=obsei-0.0.9-cp37-none-any.whl size=65557 sha256=bc7c8c937eed4a7b325b3ef8e46de64e44778e40914d99267356cc4ce36c7c27\n",
+            "  Stored in directory: /tmp/pip-ephem-wheel-cache-qhkx9sy8/wheels/49/1a/6e/2fd83c9a275b7096fc615a0edef2d55b1fc33c3751ba45c1ad\n",
+            "Successfully built obsei\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "w-avdxnAUkrR"
+      },
+      "source": [
+        "## Mount your Google Drive to store CSV"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "nn216pBrEzIz",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "288fed19-0714-4c36-b012-4ec365b63c8c"
+      },
+      "source": [
+        "from google.colab import drive\n",
+        "drive.mount('/content/drive')"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "DjV4pKGEU0IG"
+      },
+      "source": [
+        "## Configure following input -\n",
+        "- `name`: Brand name of App\n",
+        "- `category_list`: List of categories to perform review text classification\n",
+        "- `identifier`: Package name of the app, it can be found at the end of the url of app in play store\n",
+        "- `country`: Country of reviews\n",
+        "- `lookup_period`: How many old reviews to collect (**Note**: Google rate limit and provide max 200 reviews only)\n",
+        "- `extra_stop_words`: Extra stop words top clean from review text\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "-ex2Uj01oyte"
+      },
+      "source": [
+        "name = \"zomato\"\n",
+        "category_list = [\"easyOrder placement\", \"Realtime order tracking\", \"easy payment options\",\"Rewards and discounts\",\"user interface\",\"social media Integration\",]\n",
+        "identifier = \"com.application.zomato\"\n",
+        "country = \"in\"\n",
+        "lookup_period = \"365d\"\n",
+        "extra_stop_words = [\"i\", \"-\", \"day\", \"will\", \".\", \"use\", \"n\", \"without\", \"please\", \"app\", \"ha\", \"ho\", \"nt\", \"wa\", \n",
+        "                    \"thi\", \"plz\", \"pleas\", \"ff\", \"ya\", \"thank\", \"you\", \"thanks\", \"mai\"]"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "cBslruvycDfR"
+      },
+      "source": [
+        "## Configure columns of Pandas DataFrame\n",
+        "`included_cols` will only be returned by Pandas Sink and `rename_cols_dict` will rename selected `included_cols` columns to desired one"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "r5FUKda1cDCI"
+      },
+      "source": [
+        "included_cols = [f\"segmented_data_classifier_data_{category}\" for category in category_list]\n",
+        "included_cols.append(\"segmented_data_classifier_data_positive\")\n",
+        "included_cols.append(\"segmented_data_classifier_data_negative\")\n",
+        "included_cols.append(\"processed_text\")\n",
+        "included_cols.append(\"meta_at\")\n",
+        "included_cols.append(\"meta_date\")\n",
+        "included_cols.append(\"meta_published date\")\n",
+        "included_cols.append(\"meta_score\")\n",
+        "# included_cols.append(\"meta_title\")\n",
+        "included_cols.append(\"meta_publisher_title\")\n",
+        "\n",
+        "rename_cols_dict = {f\"segmented_data_classifier_data_{category}\": category for category in category_list}\n",
+        "rename_cols_dict[\"segmented_data_classifier_data_positive\"] = \"positive\"\n",
+        "rename_cols_dict[\"segmented_data_classifier_data_negative\"] = \"negative\"\n",
+        "rename_cols_dict[\"processed_text\"] = \"text\"\n",
+        "rename_cols_dict[\"meta_at\"] = \"time\"\n",
+        "rename_cols_dict[\"meta_date\"] = \"time\"\n",
+        "rename_cols_dict[\"meta_published date\"] = \"time\"\n",
+        "rename_cols_dict[\"meta_score\"] = \"ratings\"\n",
+        "# rename_cols_dict[\"meta_title\"] = \"title\"\n",
+        "rename_cols_dict[\"meta_publisher_title\"] = \"news publisher\"\n",
+        "rename_cols_dict['Unnamed: 0'] = 'reviews'"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "aBCbHvGnU0qw"
+      },
+      "source": [
+        "## Configure Play Store Review Observer\n",
+        "\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "Yr8ucKlJHRUg"
+      },
+      "source": [
+        "from obsei.source.playstore_scrapper import (\n",
+        "    PlayStoreScrapperSource,\n",
+        "    PlayStoreScrapperConfig,\n",
+        ")\n",
+        "\n",
+        "source_config = PlayStoreScrapperConfig(\n",
+        "    countries=[country],\n",
+        "    package_name=identifier,\n",
+        "    lookup_period=lookup_period\n",
+        ")\n",
+        "\n",
+        "source = PlayStoreScrapperSource()"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "joXahJ_6U2XQ"
+      },
+      "source": [
+        "## Configure TextCleaner as Pre-Processor to clean review text\n",
+        "These cleaning function will run serially"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "esmT-8IhpHvp",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "5ddd4c5a-6828-4145-8b1e-a30639a16223"
+      },
+      "source": [
+        "from obsei.preprocessor.text_cleaner import TextCleaner, TextCleanerConfig\n",
+        "from obsei.preprocessor.text_cleaning_function import *\n",
+        "\n",
+        "text_cleaner_config = TextCleanerConfig(\n",
+        "    stop_words=extra_stop_words,\n",
+        "    cleaning_functions = [\n",
+        "        ToLowerCase(),\n",
+        "        RemoveWhiteSpaceAndEmptyToken(),\n",
+        "        RemovePunctuation(),\n",
+        "        RemoveSpecialChars(),\n",
+        "        DecodeUnicode(),\n",
+        "        RemoveDateTime(),\n",
+        "        RemoveStopWords(),\n",
+        "        RemoveStopWords(stop_words=extra_stop_words),\n",
+        "        RemoveWhiteSpaceAndEmptyToken(),\n",
+        "   ]\n",
+        ")\n",
+        "\n",
+        "text_cleaner = TextCleaner()"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "[nltk_data] Downloading package stopwords to /root/nltk_data...\n",
+            "[nltk_data]   Package stopwords is already up-to-date!\n"
+          ],
+          "name": "stderr"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "yiLCi925U1YN"
+      },
+      "source": [
+        "## Configure Classification Analyzer\n",
+        "**Note**: Select model from https://huggingface.co/models?pipeline_tag=zero-shot-classification, if you want to try different one"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "26ASFOm_HW2s"
+      },
+      "source": [
+        "from obsei.analyzer.classification_analyzer import ClassificationAnalyzerConfig, ZeroShotClassificationAnalyzer\n",
+        "\n",
+        "analyzer_config=ClassificationAnalyzerConfig(\n",
+        "   labels=category_list,\n",
+        ")\n",
+        "\n",
+        "text_analyzer = ZeroShotClassificationAnalyzer(\n",
+        "   model_name_or_path=\"typeform/mobilebert-uncased-mnli\",\n",
+        "   device=\"auto\"\n",
+        ")"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "51iUR7QOU1zC"
+      },
+      "source": [
+        "## Configure Pandas DataFrame Informer\n",
+        "\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "1aLYa-QBHaDs"
+      },
+      "source": [
+        "from pandas import DataFrame\n",
+        "from obsei.sink.pandas_sink import PandasSink, PandasSinkConfig\n",
+        "\n",
+        "sink_config = PandasSinkConfig(\n",
+        "   dataframe=DataFrame(),\n",
+        "   include_columns_list=included_cols\n",
+        ")\n",
+        "sink = PandasSink()"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "xvy-TwzQU3rb"
+      },
+      "source": [
+        "## Fetch app reviews"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "YmTz7qKayCTW"
+      },
+      "source": [
+        "source_response_list = source.lookup(source_config)"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "JDCsk9QoU4pu"
+      },
+      "source": [
+        "## PreProcess review text to clean it\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "TnbjTr4vyHOk",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "69115baf-4b22-462f-c8fd-faaeb0601ba7"
+      },
+      "source": [
+        "cleaner_response_list = text_cleaner.preprocess_input(\n",
+        "    input_list=source_response_list,\n",
+        "    config=text_cleaner_config\n",
+        ")"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "07/11/2021 17:09:25 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:25 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:25 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:25 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:25 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n"
+          ],
+          "name": "stderr"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "tjoWwW42U4J6"
+      },
+      "source": [
+        "## Analyze reviews to perform classification\n",
+        "**Note**: This is compute heavy step"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "qZyRJIcjyEIu",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "678f8211-9bcc-4fd7-ab95-0c73fda47938"
+      },
+      "source": [
+        "analyzer_response_list = text_analyzer.analyze_input(\n",
+        "    source_response_list=cleaner_response_list,\n",
+        "    analyzer_config=analyzer_config\n",
+        ")"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n"
+          ],
+          "name": "stderr"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "9bihm518VDSJ"
+      },
+      "source": [
+        "## Inform review data in form of Pandas DataFrame"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "zFPdISnWHr9j",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 419
+        },
+        "outputId": "14316ee7-22d4-4542-c0b8-e6dbe1dff1f2"
+      },
+      "source": [
+        "dataframe = sink.send_data(analyzer_response_list, sink_config)\n",
+        "dataframe.rename(rename_cols_dict,axis=1,inplace=True)\n",
+        "\n",
+        "\n",
+        "dataframe[\"brand\"] = name\n",
+        "dataframe"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>text</th>\n",
+              "      <th>positive</th>\n",
+              "      <th>easy payment options</th>\n",
+              "      <th>easyOrder placement</th>\n",
+              "      <th>user interface</th>\n",
+              "      <th>Realtime order tracking</th>\n",
+              "      <th>Rewards and discounts</th>\n",
+              "      <th>social media Integration</th>\n",
+              "      <th>negative</th>\n",
+              "      <th>ratings</th>\n",
+              "      <th>time</th>\n",
+              "      <th>brand</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>good</td>\n",
+              "      <td>1.00</td>\n",
+              "      <td>0.67</td>\n",
+              "      <td>0.65</td>\n",
+              "      <td>0.60</td>\n",
+              "      <td>0.43</td>\n",
+              "      <td>0.35</td>\n",
+              "      <td>0.06</td>\n",
+              "      <td>0.00</td>\n",
+              "      <td>5</td>\n",
+              "      <td>2021-07-11 17:09:17</td>\n",
+              "      <td>zomato</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>excellent loving</td>\n",
+              "      <td>1.00</td>\n",
+              "      <td>0.20</td>\n",
+              "      <td>0.19</td>\n",
+              "      <td>0.32</td>\n",
+              "      <td>0.10</td>\n",
+              "      <td>0.11</td>\n",
+              "      <td>0.01</td>\n",
+              "      <td>0.00</td>\n",
+              "      <td>5</td>\n",
+              "      <td>2021-07-11 17:08:09</td>\n",
+              "      <td>zomato</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>delievered wrong house</td>\n",
+              "      <td>0.00</td>\n",
+              "      <td>0.00</td>\n",
+              "      <td>0.00</td>\n",
+              "      <td>0.26</td>\n",
+              "      <td>0.00</td>\n",
+              "      <td>0.02</td>\n",
+              "      <td>0.03</td>\n",
+              "      <td>0.99</td>\n",
+              "      <td>1</td>\n",
+              "      <td>2021-07-11 17:07:36</td>\n",
+              "      <td>zomato</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>superb excellent</td>\n",
+              "      <td>1.00</td>\n",
+              "      <td>0.55</td>\n",
+              "      <td>0.57</td>\n",
+              "      <td>0.71</td>\n",
+              "      <td>0.28</td>\n",
+              "      <td>0.20</td>\n",
+              "      <td>0.02</td>\n",
+              "      <td>0.00</td>\n",
+              "      <td>5</td>\n",
+              "      <td>2021-07-11 17:07:17</td>\n",
+              "      <td>zomato</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>good</td>\n",
+              "      <td>1.00</td>\n",
+              "      <td>0.67</td>\n",
+              "      <td>0.65</td>\n",
+              "      <td>0.60</td>\n",
+              "      <td>0.43</td>\n",
+              "      <td>0.35</td>\n",
+              "      <td>0.06</td>\n",
+              "      <td>0.00</td>\n",
+              "      <td>4</td>\n",
+              "      <td>2021-07-11 17:05:58</td>\n",
+              "      <td>zomato</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>...</th>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>195</th>\n",
+              "      <td>sellers cheat users selling less quantity cont...</td>\n",
+              "      <td>0.18</td>\n",
+              "      <td>0.00</td>\n",
+              "      <td>0.04</td>\n",
+              "      <td>0.07</td>\n",
+              "      <td>0.04</td>\n",
+              "      <td>0.08</td>\n",
+              "      <td>0.03</td>\n",
+              "      <td>0.68</td>\n",
+              "      <td>1</td>\n",
+              "      <td>2021-07-11 16:08:05</td>\n",
+              "      <td>zomato</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>196</th>\n",
+              "      <td>nice service</td>\n",
+              "      <td>0.99</td>\n",
+              "      <td>0.81</td>\n",
+              "      <td>0.40</td>\n",
+              "      <td>0.60</td>\n",
+              "      <td>0.12</td>\n",
+              "      <td>0.28</td>\n",
+              "      <td>0.02</td>\n",
+              "      <td>0.00</td>\n",
+              "      <td>5</td>\n",
+              "      <td>2021-07-11 16:07:52</td>\n",
+              "      <td>zomato</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>197</th>\n",
+              "      <td>amazing experience far</td>\n",
+              "      <td>0.99</td>\n",
+              "      <td>0.02</td>\n",
+              "      <td>0.04</td>\n",
+              "      <td>0.21</td>\n",
+              "      <td>0.09</td>\n",
+              "      <td>0.02</td>\n",
+              "      <td>0.01</td>\n",
+              "      <td>0.00</td>\n",
+              "      <td>5</td>\n",
+              "      <td>2021-07-11 16:07:53</td>\n",
+              "      <td>zomato</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>198</th>\n",
+              "      <td>delivery fast less offers cash delivery</td>\n",
+              "      <td>0.94</td>\n",
+              "      <td>0.94</td>\n",
+              "      <td>0.17</td>\n",
+              "      <td>0.62</td>\n",
+              "      <td>0.13</td>\n",
+              "      <td>0.06</td>\n",
+              "      <td>0.03</td>\n",
+              "      <td>0.42</td>\n",
+              "      <td>2</td>\n",
+              "      <td>2021-07-11 16:07:38</td>\n",
+              "      <td>zomato</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>199</th>\n",
+              "      <td>good food fast delivery</td>\n",
+              "      <td>0.99</td>\n",
+              "      <td>0.95</td>\n",
+              "      <td>0.30</td>\n",
+              "      <td>0.52</td>\n",
+              "      <td>0.09</td>\n",
+              "      <td>0.33</td>\n",
+              "      <td>0.00</td>\n",
+              "      <td>0.00</td>\n",
+              "      <td>5</td>\n",
+              "      <td>2021-07-11 16:07:23</td>\n",
+              "      <td>zomato</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>200 rows × 12 columns</p>\n",
+              "</div>"
+            ],
+            "text/plain": [
+              "                                                  text  ...   brand\n",
+              "0                                                 good  ...  zomato\n",
+              "1                                     excellent loving  ...  zomato\n",
+              "2                               delievered wrong house  ...  zomato\n",
+              "3                                     superb excellent  ...  zomato\n",
+              "4                                                 good  ...  zomato\n",
+              "..                                                 ...  ...     ...\n",
+              "195  sellers cheat users selling less quantity cont...  ...  zomato\n",
+              "196                                       nice service  ...  zomato\n",
+              "197                             amazing experience far  ...  zomato\n",
+              "198            delivery fast less offers cash delivery  ...  zomato\n",
+              "199                            good food fast delivery  ...  zomato\n",
+              "\n",
+              "[200 rows x 12 columns]"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          },
+          "execution_count": 26
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "maUCheshVD0D"
+      },
+      "source": [
+        "## Store result in Google Drive as CSV"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "_R-UBzg4ma3-"
+      },
+      "source": [
+        "dataframe.to_csv(f'/content/drive/My Drive/playstore_{name}.csv')"
+      ],
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}
diff --git a/obsei_module/obsei-master/tutorials/03_AppStore_PreProc_Classification_Pandas.ipynb b/obsei_module/obsei-master/tutorials/03_AppStore_PreProc_Classification_Pandas.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..fd7733dd9fa0e4d45ce326d09474d70e862e7055
--- /dev/null
+++ b/obsei_module/obsei-master/tutorials/03_AppStore_PreProc_Classification_Pandas.ipynb
@@ -0,0 +1,791 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "name": "03_AppStore_PreProc_Classification_Pandas.ipynb",
+      "provenance": [],
+      "collapsed_sections": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "9jR2mNIedthU"
+      },
+      "source": [
+        "# Obsei Tutorial 03\n",
+        "## This example shows following Obsei workflow\n",
+        " 1. Observe: App Store's app reviews\n",
+        " 2. Pre-process: Clean review text with properly\n",
+        " 3. Analyze: Classify review text within given category list\n",
+        " 4. Inform: Provide all data in Pandas DataFrame\n",
+        " 5. Store: Store data in Google Drive in CSV format"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "mBx_RzpFVFQL"
+      },
+      "source": [
+        "## Install Obsei from latest code, perform these steps -\n",
+        "- Select GPU RunType for faster computation \n",
+        "- Restart Runtime after installation\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "Oh74E2T9HO-F",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "a95aff05-684e-44e4-e21a-fbbfe8010453"
+      },
+      "source": [
+        "!pip install obsei[all]"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "Collecting git+https://github.com/lalitpagaria/obsei.git\n",
+            "  Cloning https://github.com/lalitpagaria/obsei.git to /tmp/pip-req-build-wl_1hpon\n",
+            "  Running command git clone -q https://github.com/lalitpagaria/obsei.git /tmp/pip-req-build-wl_1hpon\n",
+            "Requirement already satisfied (use --upgrade to upgrade): obsei==0.0.9 from git+https://github.com/lalitpagaria/obsei.git in /usr/local/lib/python3.7/dist-packages\n",
+            "Requirement already satisfied: app-store-reviews-reader==1.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.2)\n",
+            "Requirement already satisfied: atlassian-python-api==3.10.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.10.0)\n",
+            "Requirement already satisfied: beautifulsoup4==4.9.3 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (4.9.3)\n",
+            "Requirement already satisfied: blis==0.7.4 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.7.4)\n",
+            "Requirement already satisfied: cachetools==4.2.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (4.2.2)\n",
+            "Requirement already satisfied: catalogue==2.0.4 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.0.4)\n",
+            "Requirement already satisfied: certifi==2021.5.30 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2021.5.30)\n",
+            "Requirement already satisfied: chardet==4.0.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (4.0.0)\n",
+            "Requirement already satisfied: click==7.1.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (7.1.2)\n",
+            "Requirement already satisfied: courlan==0.4.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.4.0)\n",
+            "Requirement already satisfied: cssselect==1.1.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.1.0)\n",
+            "Requirement already satisfied: cymem==2.0.5 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.0.5)\n",
+            "Requirement already satisfied: dateparser==1.0.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.0.0)\n",
+            "Requirement already satisfied: deprecated==1.2.12 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.2.12)\n",
+            "Requirement already satisfied: elasticsearch==7.13.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (7.13.1)\n",
+            "Requirement already satisfied: feedparser==6.0.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (6.0.2)\n",
+            "Requirement already satisfied: filelock==3.0.12 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.0.12)\n",
+            "Requirement already satisfied: gnews==0.1.3 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.1.3)\n",
+            "Requirement already satisfied: google-api-core==1.30.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.30.0)\n",
+            "Requirement already satisfied: google-api-python-client==2.8.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.8.0)\n",
+            "Requirement already satisfied: google-auth==1.30.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.30.2)\n",
+            "Requirement already satisfied: google-auth-httplib2==0.1.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.1.0)\n",
+            "Requirement already satisfied: google-play-scraper==1.0.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.0.0)\n",
+            "Requirement already satisfied: googleapis-common-protos==1.53.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.53.0)\n",
+            "Requirement already satisfied: greenlet==1.1.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.1.0)\n",
+            "Requirement already satisfied: htmldate==0.8.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.8.1)\n",
+            "Requirement already satisfied: httplib2==0.19.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.19.1)\n",
+            "Requirement already satisfied: huggingface-hub==0.0.8 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.0.8)\n",
+            "Requirement already satisfied: idna==2.10 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.10)\n",
+            "Requirement already satisfied: importlib-metadata==4.5.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (4.5.0)\n",
+            "Requirement already satisfied: jinja2==3.0.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.0.1)\n",
+            "Requirement already satisfied: joblib==1.0.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.0.1)\n",
+            "Requirement already satisfied: justext==2.2.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.2.0)\n",
+            "Requirement already satisfied: lxml==4.6.3 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (4.6.3)\n",
+            "Requirement already satisfied: markupsafe==2.0.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.0.1)\n",
+            "Requirement already satisfied: mmh3==3.0.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.0.0)\n",
+            "Requirement already satisfied: murmurhash==1.0.5 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.0.5)\n",
+            "Requirement already satisfied: nltk==3.6.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.6.2)\n",
+            "Requirement already satisfied: numpy==1.20.3 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.20.3)\n",
+            "Requirement already satisfied: oauthlib==3.1.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.1.1)\n",
+            "Requirement already satisfied: packaging==20.9 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (20.9)\n",
+            "Requirement already satisfied: pandas==1.2.4 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.2.4)\n",
+            "Requirement already satisfied: pathy==0.5.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.5.2)\n",
+            "Requirement already satisfied: praw==7.2.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (7.2.0)\n",
+            "Requirement already satisfied: prawcore==2.1.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.1.0)\n",
+            "Requirement already satisfied: preshed==3.0.5 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.0.5)\n",
+            "Requirement already satisfied: presidio-analyzer==2.2.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.2.1)\n",
+            "Requirement already satisfied: presidio-anonymizer==2.2.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.2.1)\n",
+            "Requirement already satisfied: protobuf==3.17.3 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.17.3)\n",
+            "Requirement already satisfied: pyasn1==0.4.8 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.4.8)\n",
+            "Requirement already satisfied: pyasn1-modules==0.2.8 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.2.8)\n",
+            "Requirement already satisfied: pycryptodome==3.10.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.10.1)\n",
+            "Requirement already satisfied: pydantic==1.7.4 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.7.4)\n",
+            "Requirement already satisfied: pyparsing==2.4.7 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.4.7)\n",
+            "Requirement already satisfied: python-dateutil==2.8.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.8.1)\n",
+            "Requirement already satisfied: python-facebook-api==0.9.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.9.2)\n",
+            "Requirement already satisfied: pytz==2021.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2021.1)\n",
+            "Requirement already satisfied: pyyaml==5.4.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (5.4.1)\n",
+            "Requirement already satisfied: readability-lxml==0.8.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.8.1)\n",
+            "Requirement already satisfied: reddit-rss-reader==1.3.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.3.2)\n",
+            "Requirement already satisfied: regex==2020.11.13 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2020.11.13)\n",
+            "Requirement already satisfied: requests==2.25.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.25.1)\n",
+            "Requirement already satisfied: requests-file==1.5.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.5.1)\n",
+            "Requirement already satisfied: requests-oauthlib==1.3.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.3.0)\n",
+            "Requirement already satisfied: rsa==4.7.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (4.7.2)\n",
+            "Requirement already satisfied: sacremoses==0.0.45 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.0.45)\n",
+            "Requirement already satisfied: searchtweets-v2==1.0.7 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.0.7)\n",
+            "Requirement already satisfied: sentencepiece==0.1.95 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.1.95)\n",
+            "Requirement already satisfied: sgmllib3k==1.0.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.0.0)\n",
+            "Requirement already satisfied: six==1.16.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.16.0)\n",
+            "Requirement already satisfied: slack-sdk==3.6.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.6.0)\n",
+            "Requirement already satisfied: smart-open==3.0.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.0.0)\n",
+            "Requirement already satisfied: soupsieve==2.2.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.2.1)\n",
+            "Requirement already satisfied: spacy==3.0.5 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.0.5)\n",
+            "Requirement already satisfied: spacy-legacy==3.0.5 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.0.5)\n",
+            "Requirement already satisfied: sqlalchemy==1.4.17 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.4.17)\n",
+            "Requirement already satisfied: srsly==2.4.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.4.1)\n",
+            "Requirement already satisfied: thinc==8.0.4 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (8.0.4)\n",
+            "Requirement already satisfied: tld==0.12.6 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.12.6)\n",
+            "Requirement already satisfied: tldextract==3.1.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.1.0)\n",
+            "Requirement already satisfied: tokenizers==0.10.3 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.10.3)\n",
+            "Requirement already satisfied: tqdm==4.61.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (4.61.0)\n",
+            "Requirement already satisfied: trafilatura==0.8.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.8.2)\n",
+            "Requirement already satisfied: transformers==4.6.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (4.6.1)\n",
+            "Requirement already satisfied: tweet-preprocessor==0.6.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.6.0)\n",
+            "Requirement already satisfied: typer==0.3.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.3.2)\n",
+            "Requirement already satisfied: typing-extensions==3.10.0.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.10.0.0)\n",
+            "Requirement already satisfied: tzlocal==2.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.1)\n",
+            "Requirement already satisfied: update-checker==0.18.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.18.0)\n",
+            "Requirement already satisfied: uritemplate==3.0.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.0.1)\n",
+            "Requirement already satisfied: urllib3==1.26.5 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.26.5)\n",
+            "Requirement already satisfied: vadersentiment==3.3.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.3.2)\n",
+            "Requirement already satisfied: wasabi==0.8.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.8.2)\n",
+            "Requirement already satisfied: websocket-client==1.0.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.0.1)\n",
+            "Requirement already satisfied: wrapt==1.12.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.12.1)\n",
+            "Requirement already satisfied: zenpy==2.0.24 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.0.24)\n",
+            "Requirement already satisfied: zipp==3.4.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.4.1)\n",
+            "Requirement already satisfied: torch==1.8.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.8.1)\n",
+            "Requirement already satisfied: setuptools>=40.3.0 in /usr/local/lib/python3.7/dist-packages (from google-api-core==1.30.0->obsei==0.0.9) (57.0.0)\n",
+            "Requirement already satisfied: cattrs<2.0,>=1.1; python_version >= \"3.7\" and python_version < \"4.0\" in /usr/local/lib/python3.7/dist-packages (from python-facebook-api==0.9.2->obsei==0.0.9) (1.7.1)\n",
+            "Requirement already satisfied: responses>=0.11 in /usr/local/lib/python3.7/dist-packages (from python-facebook-api==0.9.2->obsei==0.0.9) (0.13.3)\n",
+            "Requirement already satisfied: attrs<21.0.0,>=20.1.0 in /usr/local/lib/python3.7/dist-packages (from python-facebook-api==0.9.2->obsei==0.0.9) (20.3.0)\n",
+            "Building wheels for collected packages: obsei\n",
+            "  Building wheel for obsei (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+            "  Created wheel for obsei: filename=obsei-0.0.9-cp37-none-any.whl size=65557 sha256=cce33049986ee20144625a85f90699a6ae020c7a8454bb4f156750446385e03b\n",
+            "  Stored in directory: /tmp/pip-ephem-wheel-cache-4be2m6lr/wheels/49/1a/6e/2fd83c9a275b7096fc615a0edef2d55b1fc33c3751ba45c1ad\n",
+            "Successfully built obsei\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "52xZY6EVVFhW"
+      },
+      "source": [
+        "## Mount your Google Drive to store CSV"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "nn216pBrEzIz",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "18e156b7-9f74-4dfb-f5e7-f70c5c8af95c"
+      },
+      "source": [
+        "from google.colab import drive\n",
+        "drive.mount('/content/drive')"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "JtTiwHGFVF5f"
+      },
+      "source": [
+        "## Configure following input -\n",
+        "- `name`: Brand name of App\n",
+        "- `category_list`: List of categories to perform review text classification\n",
+        "- `identifier`: Id of the app, it can be found at the end of the url of app in app store\n",
+        "- `country`: Country of reviews\n",
+        "- `lookup_period`: How many old reviews to collect (**Note**: Apple rate limit and provide max 450 reviews only)\n",
+        "- `extra_stop_words`: Extra stop words top clean from review text"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "-ex2Uj01oyte"
+      },
+      "source": [
+        "name = \"zomato\"\n",
+        "category_list = [\"easy order placement\", \"realtime order tracking\", \"easy payment options\", \"rewards and discounts\",\"user interface\", \"social media Integration\"]\n",
+        "identifier = \"434613896\"\n",
+        "country = \"in\"\n",
+        "lookup_period = \"365d\"\n",
+        "extra_stop_words = [\"i\", \"-\", \"day\", \"will\", \".\", \"use\", \"n\", \"without\", \"please\", \"app\", \"ha\", \"ho\", \"nt\", \"wa\", \n",
+        "                    \"thi\", \"plz\", \"pleas\", \"ff\", \"ya\", \"thank\", \"you\", \"thanks\", \"mai\"]"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "7a4yOP-9eZYJ"
+      },
+      "source": [
+        "## Configure columns of Pandas DataFrame\n",
+        "`included_cols` will only be returned by Pandas Sink and `rename_cols_dict` will rename selected `included_cols` columns to desired one"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "DBE-jy4QeZzu"
+      },
+      "source": [
+        "included_cols = [f\"segmented_data_classifier_data_{category}\" for category in category_list]\n",
+        "included_cols.append(\"segmented_data_classifier_data_positive\")\n",
+        "included_cols.append(\"segmented_data_classifier_data_negative\")\n",
+        "included_cols.append(\"processed_text\")\n",
+        "included_cols.append(\"meta_at\")\n",
+        "included_cols.append(\"meta_date\")\n",
+        "included_cols.append(\"meta_published date\")\n",
+        "included_cols.append(\"meta_rating\")\n",
+        "# included_cols.append(\"meta_title\")\n",
+        "included_cols.append(\"meta_publisher_title\")\n",
+        "\n",
+        "rename_cols_dict = {f\"segmented_data_classifier_data_{category}\": category for category in category_list}\n",
+        "rename_cols_dict[\"segmented_data_classifier_data_positive\"] = \"positive\"\n",
+        "rename_cols_dict[\"segmented_data_classifier_data_negative\"] = \"negative\"\n",
+        "rename_cols_dict[\"processed_text\"] = \"text\"\n",
+        "rename_cols_dict[\"meta_at\"] = \"time\"\n",
+        "rename_cols_dict[\"meta_date\"] = \"time\"\n",
+        "rename_cols_dict[\"meta_rating\"] = \"ratings\"\n",
+        "rename_cols_dict[\"meta_published date\"] = \"time\"\n",
+        "# rename_cols_dict[\"meta_title\"] = \"title\"\n",
+        "rename_cols_dict[\"meta_publisher_title\"] = \"news publisher\"\n",
+        "rename_cols_dict['Unnamed: 0'] = 'reviews'"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "CPP5pM0eVGq2"
+      },
+      "source": [
+        "## Configure App Store Review Observer"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "Yr8ucKlJHRUg"
+      },
+      "source": [
+        "from obsei.source.appstore_scrapper import (\n",
+        "    AppStoreScrapperConfig,\n",
+        "    AppStoreScrapperSource,\n",
+        ")\n",
+        "\n",
+        "source_config = AppStoreScrapperConfig(\n",
+        "    countries=[country],\n",
+        "    app_id=identifier,\n",
+        "    lookup_period=lookup_period\n",
+        ")\n",
+        "\n",
+        "source = AppStoreScrapperSource()"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "kgJVp1DgVIpM"
+      },
+      "source": [
+        "## Configure TextCleaner as Pre-Processor to clean review text\n",
+        "These cleaning function will run serially"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "esmT-8IhpHvp",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "5db65872-1017-4bf4-fc15-1dcd5cbc1790"
+      },
+      "source": [
+        "from obsei.preprocessor.text_cleaner import TextCleaner, TextCleanerConfig\n",
+        "from obsei.preprocessor.text_cleaning_function import *\n",
+        "\n",
+        "text_cleaner_config = TextCleanerConfig(\n",
+        "    stop_words=extra_stop_words,\n",
+        "    cleaning_functions = [\n",
+        "        ToLowerCase(),\n",
+        "        RemoveWhiteSpaceAndEmptyToken(),\n",
+        "        RemovePunctuation(),\n",
+        "        RemoveSpecialChars(),\n",
+        "        DecodeUnicode(),\n",
+        "        RemoveDateTime(),\n",
+        "        RemoveStopWords(),\n",
+        "        RemoveStopWords(stop_words=extra_stop_words),\n",
+        "        RemoveWhiteSpaceAndEmptyToken(),\n",
+        "   ]\n",
+        ")\n",
+        "\n",
+        "text_cleaner = TextCleaner()"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "[nltk_data] Downloading package stopwords to /root/nltk_data...\n",
+            "[nltk_data]   Package stopwords is already up-to-date!\n"
+          ],
+          "name": "stderr"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "6_DNazzVVHVV"
+      },
+      "source": [
+        "## Configure Classification Analyzer\n",
+        "**Note**: Select model from https://huggingface.co/models?pipeline_tag=zero-shot-classification, if you want to try different one"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "26ASFOm_HW2s"
+      },
+      "source": [
+        "from obsei.analyzer.classification_analyzer import ClassificationAnalyzerConfig, ZeroShotClassificationAnalyzer\n",
+        "\n",
+        "analyzer_config=ClassificationAnalyzerConfig(\n",
+        "   labels=category_list,\n",
+        ")\n",
+        "\n",
+        "text_analyzer = ZeroShotClassificationAnalyzer(\n",
+        "   model_name_or_path=\"typeform/mobilebert-uncased-mnli\",\n",
+        "   device=\"auto\"\n",
+        ")"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "vTdPn25kVH9l"
+      },
+      "source": [
+        "## Configure Pandas DataFrame Informer"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "1aLYa-QBHaDs"
+      },
+      "source": [
+        "from pandas import DataFrame\n",
+        "from obsei.sink.pandas_sink import PandasSink, PandasSinkConfig\n",
+        "\n",
+        "sink_config = PandasSinkConfig(\n",
+        "   dataframe=DataFrame(),\n",
+        "   include_columns_list=included_cols\n",
+        ")\n",
+        "sink = PandasSink()"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "n0hDL8ChVMuN"
+      },
+      "source": [
+        "## Fetch app reviews"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "YmTz7qKayCTW"
+      },
+      "source": [
+        "source_response_list = source.lookup(source_config)"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "fPi5DG8oVNK-"
+      },
+      "source": [
+        "## PreProcess review text to clean it"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "TnbjTr4vyHOk"
+      },
+      "source": [
+        "cleaner_response_list = text_cleaner.preprocess_input(\n",
+        "    input_list=source_response_list,\n",
+        "    config=text_cleaner_config\n",
+        ")"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "5T1hDzG9VNnd"
+      },
+      "source": [
+        "## Analyze reviews to perform classification\n",
+        "**Note**: This is compute heavy step"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "qZyRJIcjyEIu",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "60d24e0c-7f95-4e52-b311-276f31329d0e"
+      },
+      "source": [
+        "analyzer_response_list = text_analyzer.analyze_input(\n",
+        "    source_response_list=cleaner_response_list,\n",
+        "    analyzer_config=analyzer_config\n",
+        ")"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n"
+          ],
+          "name": "stderr"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "-VhyKGLWVOSB"
+      },
+      "source": [
+        "## Inform review data in form of Pandas DataFrame"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "zFPdISnWHr9j",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 424
+        },
+        "outputId": "9ebd0b7e-d5e3-4777-cf8c-60e813ead9a9"
+      },
+      "source": [
+        "sink_config = PandasSinkConfig(\n",
+        "   dataframe=DataFrame(),\n",
+        "   include_columns_list=included_cols\n",
+        ")\n",
+        "\n",
+        "dataframe = sink.send_data(analyzer_response_list, sink_config)\n",
+        "dataframe.rename(rename_cols_dict,axis=1,inplace=True)\n",
+        "\n",
+        "\n",
+        "dataframe[\"brand\"] = name\n",
+        "dataframe"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>text</th>\n",
+              "      <th>positive</th>\n",
+              "      <th>user interface</th>\n",
+              "      <th>rewards and discounts</th>\n",
+              "      <th>negative</th>\n",
+              "      <th>realtime order tracking</th>\n",
+              "      <th>social media Integration</th>\n",
+              "      <th>easy order placement</th>\n",
+              "      <th>easy payment options</th>\n",
+              "      <th>ratings</th>\n",
+              "      <th>time</th>\n",
+              "      <th>brand</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>awesome unmade zomato user switched limited re...</td>\n",
+              "      <td>0.72</td>\n",
+              "      <td>0.11</td>\n",
+              "      <td>0.06</td>\n",
+              "      <td>0.02</td>\n",
+              "      <td>0.02</td>\n",
+              "      <td>0.02</td>\n",
+              "      <td>0.01</td>\n",
+              "      <td>0.01</td>\n",
+              "      <td>5</td>\n",
+              "      <td>2021-07-10 12:21:41</td>\n",
+              "      <td>zomato</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>best service mast service thii time</td>\n",
+              "      <td>0.99</td>\n",
+              "      <td>0.26</td>\n",
+              "      <td>0.17</td>\n",
+              "      <td>0.00</td>\n",
+              "      <td>0.16</td>\n",
+              "      <td>0.01</td>\n",
+              "      <td>0.21</td>\n",
+              "      <td>0.29</td>\n",
+              "      <td>5</td>\n",
+              "      <td>2021-07-10 12:20:34</td>\n",
+              "      <td>zomato</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>nice nice</td>\n",
+              "      <td>1.00</td>\n",
+              "      <td>0.70</td>\n",
+              "      <td>0.38</td>\n",
+              "      <td>0.00</td>\n",
+              "      <td>0.30</td>\n",
+              "      <td>0.06</td>\n",
+              "      <td>0.44</td>\n",
+              "      <td>0.58</td>\n",
+              "      <td>5</td>\n",
+              "      <td>2021-07-10 12:20:07</td>\n",
+              "      <td>zomato</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>listening single cheese burger concern love zo...</td>\n",
+              "      <td>0.98</td>\n",
+              "      <td>0.81</td>\n",
+              "      <td>0.00</td>\n",
+              "      <td>0.00</td>\n",
+              "      <td>0.05</td>\n",
+              "      <td>0.00</td>\n",
+              "      <td>0.06</td>\n",
+              "      <td>0.07</td>\n",
+              "      <td>5</td>\n",
+              "      <td>2021-07-10 12:19:20</td>\n",
+              "      <td>zomato</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>good good</td>\n",
+              "      <td>1.00</td>\n",
+              "      <td>0.62</td>\n",
+              "      <td>0.42</td>\n",
+              "      <td>0.00</td>\n",
+              "      <td>0.50</td>\n",
+              "      <td>0.05</td>\n",
+              "      <td>0.53</td>\n",
+              "      <td>0.69</td>\n",
+              "      <td>5</td>\n",
+              "      <td>2021-07-10 12:15:17</td>\n",
+              "      <td>zomato</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>...</th>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>495</th>\n",
+              "      <td>nice gud</td>\n",
+              "      <td>1.00</td>\n",
+              "      <td>0.87</td>\n",
+              "      <td>0.30</td>\n",
+              "      <td>0.00</td>\n",
+              "      <td>0.14</td>\n",
+              "      <td>0.08</td>\n",
+              "      <td>0.36</td>\n",
+              "      <td>0.68</td>\n",
+              "      <td>5</td>\n",
+              "      <td>2021-07-07 15:54:35</td>\n",
+              "      <td>zomato</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>496</th>\n",
+              "      <td>bad experience delivery guy refused take rs no...</td>\n",
+              "      <td>0.00</td>\n",
+              "      <td>0.29</td>\n",
+              "      <td>0.08</td>\n",
+              "      <td>1.00</td>\n",
+              "      <td>0.02</td>\n",
+              "      <td>0.03</td>\n",
+              "      <td>0.00</td>\n",
+              "      <td>0.00</td>\n",
+              "      <td>1</td>\n",
+              "      <td>2021-07-07 15:54:24</td>\n",
+              "      <td>zomato</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>497</th>\n",
+              "      <td>shikha excellent</td>\n",
+              "      <td>1.00</td>\n",
+              "      <td>0.94</td>\n",
+              "      <td>0.45</td>\n",
+              "      <td>0.00</td>\n",
+              "      <td>0.48</td>\n",
+              "      <td>0.06</td>\n",
+              "      <td>0.70</td>\n",
+              "      <td>0.91</td>\n",
+              "      <td>5</td>\n",
+              "      <td>2021-07-07 15:53:40</td>\n",
+              "      <td>zomato</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>498</th>\n",
+              "      <td>ordered delivery yet pathetic service</td>\n",
+              "      <td>0.00</td>\n",
+              "      <td>0.27</td>\n",
+              "      <td>0.01</td>\n",
+              "      <td>1.00</td>\n",
+              "      <td>0.00</td>\n",
+              "      <td>0.02</td>\n",
+              "      <td>0.00</td>\n",
+              "      <td>0.00</td>\n",
+              "      <td>1</td>\n",
+              "      <td>2021-07-07 15:47:03</td>\n",
+              "      <td>zomato</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>499</th>\n",
+              "      <td>super awesome experience</td>\n",
+              "      <td>0.99</td>\n",
+              "      <td>0.37</td>\n",
+              "      <td>0.06</td>\n",
+              "      <td>0.00</td>\n",
+              "      <td>0.09</td>\n",
+              "      <td>0.01</td>\n",
+              "      <td>0.01</td>\n",
+              "      <td>0.09</td>\n",
+              "      <td>5</td>\n",
+              "      <td>2021-07-07 15:40:27</td>\n",
+              "      <td>zomato</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>500 rows × 12 columns</p>\n",
+              "</div>"
+            ],
+            "text/plain": [
+              "                                                  text  ...   brand\n",
+              "0    awesome unmade zomato user switched limited re...  ...  zomato\n",
+              "1                  best service mast service thii time  ...  zomato\n",
+              "2                                            nice nice  ...  zomato\n",
+              "3    listening single cheese burger concern love zo...  ...  zomato\n",
+              "4                                            good good  ...  zomato\n",
+              "..                                                 ...  ...     ...\n",
+              "495                                           nice gud  ...  zomato\n",
+              "496  bad experience delivery guy refused take rs no...  ...  zomato\n",
+              "497                                   shikha excellent  ...  zomato\n",
+              "498              ordered delivery yet pathetic service  ...  zomato\n",
+              "499                           super awesome experience  ...  zomato\n",
+              "\n",
+              "[500 rows x 12 columns]"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          },
+          "execution_count": 12
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "SYEFvvx6VPyp"
+      },
+      "source": [
+        "## Store result in Google Drive as CSV"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "_R-UBzg4ma3-"
+      },
+      "source": [
+        "dataframe.to_csv(f'/content/drive/MyDrive/appstore_{name}.csv')"
+      ],
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}
diff --git a/obsei_module/obsei-master/tutorials/04_GoogleNews_Cleaner_Splitter_Classification_Aggregator.ipynb b/obsei_module/obsei-master/tutorials/04_GoogleNews_Cleaner_Splitter_Classification_Aggregator.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..8a1a4a8e04b6e92095ba34b03587bcf317eeaaf4
--- /dev/null
+++ b/obsei_module/obsei-master/tutorials/04_GoogleNews_Cleaner_Splitter_Classification_Aggregator.ipynb
@@ -0,0 +1,352 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "name": "04_GoogleNews_Cleaner_Splitter_Classification_Aggregator.ipynb",
+      "provenance": []
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    },
+    "accelerator": "GPU"
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "23R6n3CjjMzV"
+      },
+      "source": [
+        "# Obsei Tutorial 04\n",
+        "## This example shows following Obsei workflow\n",
+        " 1. Observe: Search and fetch news article via Google News\n",
+        " 2. Cleaner: Clean article text proerply\n",
+        " 3. Analyze: Classify article text while splitting text in small chunks and later computing final inference using given formula"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "XQiAxuEqlMpB"
+      },
+      "source": [
+        "## Install Obsei from latest code, perform these steps -\n",
+        "- Select GPU RunType for faster computation \n",
+        "- Restart Runtime after installation"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "CSwBGl1xis5q",
+        "outputId": "cb0676da-e778-46f7-f761-ad59687e125d"
+      },
+      "source": [
+        "!pip install obsei[all]\n",
+        "!pip install trafilatura"
+      ],
+      "execution_count": 1,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "mEIG7Zs-lQVB"
+      },
+      "source": [
+        "## Configure Google News Observer"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "ASPOB5Alla7q"
+      },
+      "source": [
+        "from obsei.source.google_news_source import GoogleNewsConfig, GoogleNewsSource\n",
+        "\n",
+        "source_config = GoogleNewsConfig(\n",
+        "    query=\"bitcoin\",\n",
+        "    max_results=10,\n",
+        "    fetch_article=True,\n",
+        "    lookup_period=\"1d\",\n",
+        ")\n",
+        "\n",
+        "source = GoogleNewsSource()"
+      ],
+      "execution_count": 10,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "CJJ06rpalsYB"
+      },
+      "source": [
+        "## Configure TextCleaner as Pre-Processor to clean review text\n",
+        "These cleaning function will run serially"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "TksY24crlsy6",
+        "outputId": "315f6f9d-ed52-4975-9f14-70f8bc218dee"
+      },
+      "source": [
+        "from obsei.preprocessor.text_cleaner import TextCleaner, TextCleanerConfig\n",
+        "from obsei.preprocessor.text_cleaning_function import *\n",
+        "\n",
+        "text_cleaner_config = TextCleanerConfig(\n",
+        "    cleaning_functions = [\n",
+        "        ToLowerCase(),\n",
+        "        RemoveWhiteSpaceAndEmptyToken(),\n",
+        "        RemovePunctuation(),\n",
+        "        RemoveSpecialChars(),\n",
+        "        DecodeUnicode(),\n",
+        "        RemoveStopWords(),\n",
+        "        RemoveWhiteSpaceAndEmptyToken(),\n",
+        "   ]\n",
+        ")\n",
+        "\n",
+        "text_cleaner = TextCleaner()"
+      ],
+      "execution_count": 14,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "[nltk_data] Downloading package stopwords to /root/nltk_data...\n",
+            "[nltk_data]   Package stopwords is already up-to-date!\n"
+          ],
+          "name": "stderr"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "z45pE_BVl_gu"
+      },
+      "source": [
+        "## Configure Classification Analyzer\n",
+        "\n",
+        "- List of categories in `labels`\n",
+        "- `TextSplitterConfig` with proper `max_split_length` and `split_stride`\n",
+        "- `InferenceAggregatorConfig` with required `aggregate_function` currently two are supported (average and max frequent class)\n",
+        "- `ClassificationMaxCategories` need `score_threshold` which is used to determine what minimum probability needed to take a class into consideration\n",
+        "\n",
+        "**Note**: Select model from https://huggingface.co/models?pipeline_tag=zero-shot-classification, if you want to try different one"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "RGzOD5yrl_CB"
+      },
+      "source": [
+        "from obsei.analyzer.classification_analyzer import ClassificationAnalyzerConfig, ZeroShotClassificationAnalyzer\n",
+        "from obsei.postprocessor.inference_aggregator import InferenceAggregatorConfig\n",
+        "from obsei.postprocessor.inference_aggregator_function import ClassificationMaxCategories\n",
+        "from obsei.preprocessor.text_splitter import TextSplitterConfig\n",
+        "\n",
+        "analyzer_config=ClassificationAnalyzerConfig(\n",
+        "   labels=[\"buy\", \"sell\", \"going up\", \"going down\"],\n",
+        "   use_splitter_and_aggregator=True,\n",
+        "   splitter_config=TextSplitterConfig(\n",
+        "       max_split_length=300,\n",
+        "       split_stride=3\n",
+        "   ),\n",
+        "   aggregator_config=InferenceAggregatorConfig(\n",
+        "       aggregate_function=ClassificationMaxCategories(\n",
+        "           score_threshold=0.3\n",
+        "       )\n",
+        "   )\n",
+        ")\n",
+        "\n",
+        "text_analyzer = ZeroShotClassificationAnalyzer(\n",
+        "   model_name_or_path=\"typeform/mobilebert-uncased-mnli\",\n",
+        "   device=\"auto\"\n",
+        ")"
+      ],
+      "execution_count": 11,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ScovHM79oMLo"
+      },
+      "source": [
+        "## Search and fetch news article"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "WniWcEzeoOKx",
+        "outputId": "a7b1c1e5-fc44-4b67-8bbe-fa717a5f48bf"
+      },
+      "source": [
+        "source_response_list = source.lookup(source_config)"
+      ],
+      "execution_count": 12,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "07/29/2021 19:08:37 - INFO - urllib3.poolmanager -   Redirecting https://www.bloomberg.com/news/articles/2021-07-29/brokers-sought-for-78-million-bitcoin-stash-from-finland-bust -> https://www.bloomberg.com/tosv2.html?vid=&uuid=60bfaca0-f0a0-11eb-9e4f-53da6d852d97&url=L25ld3MvYXJ0aWNsZXMvMjAyMS0wNy0yOS9icm9rZXJzLXNvdWdodC1mb3ItNzgtbWlsbGlvbi1iaXRjb2luLXN0YXNoLWZyb20tZmlubGFuZC1idXN0\n",
+            "07/29/2021 19:08:37 - INFO - trafilatura.core -   using custom extraction: None\n",
+            "07/29/2021 19:08:37 - INFO - trafilatura.core -   not enough comments None\n",
+            "07/29/2021 19:08:37 - INFO - urllib3.poolmanager -   Redirecting https://www.bloomberg.com/news/articles/2021-07-29/new-ira-product-allows-for-tax-free-bitcoin-mining -> https://www.bloomberg.com/tosv2.html?vid=&uuid=60f22e50-f0a0-11eb-90b4-6d36db3c27b3&url=L25ld3MvYXJ0aWNsZXMvMjAyMS0wNy0yOS9uZXctaXJhLXByb2R1Y3QtYWxsb3dzLWZvci10YXgtZnJlZS1iaXRjb2luLW1pbmluZw==\n",
+            "07/29/2021 19:08:37 - INFO - trafilatura.core -   using custom extraction: None\n",
+            "07/29/2021 19:08:37 - ERROR - trafilatura.core -   not enough text None\n",
+            "07/29/2021 19:08:37 - INFO - trafilatura.core -   not enough comments None\n",
+            "07/29/2021 19:08:38 - INFO - trafilatura.core -   using custom extraction: None\n",
+            "07/29/2021 19:08:38 - INFO - trafilatura.core -   not enough comments None\n",
+            "07/29/2021 19:08:38 - INFO - readability.readability -   ruthless removal did not work. \n",
+            "07/29/2021 19:08:38 - INFO - trafilatura.core -   using custom extraction: None\n",
+            "07/29/2021 19:08:38 - INFO - trafilatura.core -   not enough comments None\n",
+            "07/29/2021 19:08:39 - INFO - trafilatura.core -   using custom extraction: None\n",
+            "07/29/2021 19:08:39 - INFO - trafilatura.core -   not enough comments None\n",
+            "07/29/2021 19:08:39 - INFO - trafilatura.core -   using custom extraction: None\n",
+            "07/29/2021 19:08:39 - INFO - trafilatura.core -   not enough comments None\n",
+            "07/29/2021 19:08:40 - INFO - trafilatura.core -   using custom extraction: None\n",
+            "07/29/2021 19:08:40 - INFO - trafilatura.core -   not enough comments None\n",
+            "07/29/2021 19:08:40 - INFO - trafilatura.core -   using custom extraction: None\n",
+            "07/29/2021 19:08:40 - INFO - trafilatura.core -   not enough comments None\n",
+            "07/29/2021 19:08:40 - INFO - readability.readability -   ruthless removal did not work. \n",
+            "07/29/2021 19:08:40 - INFO - trafilatura.core -   using generic algorithm: None\n",
+            "07/29/2021 19:08:40 - INFO - trafilatura.core -   not enough comments None\n",
+            "07/29/2021 19:08:40 - INFO - trafilatura.core -   text and comments not long enough: 0 0\n",
+            "07/29/2021 19:08:40 - INFO - trafilatura.core -   discarding data for url: None\n",
+            "07/29/2021 19:08:41 - INFO - trafilatura.core -   using custom extraction: None\n",
+            "07/29/2021 19:08:41 - INFO - trafilatura.core -   not enough comments None\n"
+          ],
+          "name": "stderr"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "KEvZ32HZoY57"
+      },
+      "source": [
+        "## PreProcess text to clean it"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "OpFEbn1joYbK"
+      },
+      "source": [
+        "cleaner_response_list = text_cleaner.preprocess_input(\n",
+        "    input_list=source_response_list,\n",
+        "    config=text_cleaner_config\n",
+        ")"
+      ],
+      "execution_count": 15,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "wsh3e3uCogxK"
+      },
+      "source": [
+        "## Analyze article to perform classification\n",
+        "**Note**: This is compute heavy step"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "YSe3F6EyohX9",
+        "outputId": "a33d01b6-d632-4475-9ae8-abd46d7b45f0"
+      },
+      "source": [
+        "analyzer_response_list = text_analyzer.analyze_input(\n",
+        "    source_response_list=cleaner_response_list,\n",
+        "    analyzer_config=analyzer_config\n",
+        ")"
+      ],
+      "execution_count": 16,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n"
+          ],
+          "name": "stderr"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "YS9Oc4DrovN_"
+      },
+      "source": [
+        "## Print Result"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "Kl7OIaDxov3L",
+        "outputId": "38520cd8-c8a9-4212-ade8-99db905bc6b7"
+      },
+      "source": [
+        "for analyzer_response in analyzer_response_list:\n",
+        "  print(vars(analyzer_response))"
+      ],
+      "execution_count": 17,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "{'segmented_data': {'aggregator_data': {'category_count': {'positive': 2, 'going up': 2, 'sell': 1}, 'max_scores': {'positive': 0.806824266910553, 'going up': 0.5611677169799805, 'sell': 0.5141412019729614}, 'aggregator_name': 'ClassificationMaxCategories'}}, 'meta': {'title': 'Bitcoin (BTC USD) Cryptocurrency Price News: Finland Seeks Broker to Sell Stash - Bloomberg', 'description': 'Bitcoin (BTC USD) Cryptocurrency Price News: Finland Seeks Broker to Sell Stash  Bloomberg', 'published date': 'Thu, 29 Jul 2021 13:11:21 GMT', 'url': 'https://www.bloomberg.com/news/articles/2021-07-29/brokers-sought-for-78-million-bitcoin-stash-from-finland-bust', 'publisher': {'href': 'https://www.bloomberg.com', 'title': 'Bloomberg'}, 'extracted_data': {'title': 'Bloomberg', 'author': None, 'hostname': None, 'date': None, 'categories': '', 'tags': '', 'fingerprint': 'BecpvREYR0Bqj6DjTeoRthAFuAs=', 'id': '6e25ac22', 'source': None, 'source-hostname': 'Are you a robot?', 'excerpt': None}}, 'source_name': 'GoogleNews', 'processed_text': 'bitcoin btc usd cryptocurrency price news finland seeks broker sell stash bloomberg continue please click box let us know robot please make sure browser supports javascript cookies blocking loading information review terms service cookie policy inquiries related message please contact support team team provide reference id'}\n",
+            "{'segmented_data': {'aggregator_data': {'category_count': {'positive': 1, 'going up': 1}, 'max_scores': {'positive': 0.7125387787818909, 'going up': 0.7029632329940796}, 'aggregator_name': 'ClassificationMaxCategories'}}, 'meta': {'title': 'Bitcoin (BTC USD) Cryptocurrency Mining Gets Tax Savings With New IRA - Bloomberg', 'description': 'Bitcoin (BTC USD) Cryptocurrency Mining Gets Tax Savings With New IRA  Bloomberg', 'published date': 'Thu, 29 Jul 2021 14:00:00 GMT', 'url': 'https://www.bloomberg.com/news/articles/2021-07-29/new-ira-product-allows-for-tax-free-bitcoin-mining', 'publisher': {'href': 'https://www.bloomberg.com', 'title': 'Bloomberg'}, 'extracted_data': {'title': 'Bloomberg', 'author': None, 'hostname': None, 'date': None, 'categories': '', 'tags': '', 'fingerprint': '1f8W9+brqRuBfWvFtSJ5NU8s49g=', 'id': '10fba49b', 'source': None, 'source-hostname': 'Are you a robot?', 'excerpt': None}}, 'source_name': 'GoogleNews', 'processed_text': 'bitcoin btc usd cryptocurrency mining gets tax savings new ira bloomberg continue please click box let us know robot terms service cookie policy contact support team provide reference id'}\n",
+            "{'segmented_data': {'aggregator_data': {'category_count': {'going up': 7, 'positive': 8, 'going down': 2, 'negative': 1}, 'max_scores': {'going up': 0.9746450185775757, 'positive': 0.8058925867080688, 'going down': 0.8555613160133362, 'negative': 0.594179630279541}, 'aggregator_name': 'ClassificationMaxCategories'}}, 'meta': {'title': 'Bitcoin price hovers around $40000 - Fox Business', 'description': 'Bitcoin price hovers around $40000  Fox Business', 'published date': 'Thu, 29 Jul 2021 08:40:26 GMT', 'url': 'https://www.foxbusiness.com/markets/bitcoin-price-7-29', 'publisher': {'href': 'https://www.foxbusiness.com', 'title': 'Fox Business'}, 'extracted_data': {'title': 'Bitcoin price hovers around $40,000', 'author': 'Ken Martin', 'hostname': 'foxbusiness.com', 'date': '2021-07-29', 'categories': '', 'tags': '', 'fingerprint': '0bin0zBplkMxTzzjw7s2HBTsoi4=', 'id': '1057f820', 'source': 'https://www.foxbusiness.com/markets/bitcoin-price-7-29', 'source-hostname': 'Fox Business', 'excerpt': None}}, 'source_name': 'GoogleNews', 'processed_text': 'bitcoin price hovers around 40000 fox business bitcoin price hovers around 40000 infrastructure deal agreed upon lawmakers president biden includes crackdown cryptocurrencies one way raise revenue bitcoin slightly higher thursday morning trading 40000 level bitcoin slightly higher thursday morning morning trading 40000 level price around 39800 per coin rivals ethereum dogecoin trading around 2290 20 cents per coin respectively according coindesk ethereum dogecoin trading around 2290 20 cents per coin respectively according coindesk crypto crackdown help finance bipartisan infrastructure bill bill crypto crackdown help finance bipartisan infrastructure bill infrastructure deal agreed upon lawmakers president biden wednesday includes crackdown cryptocurrencies one way raise revenue support spending infrastructure deal agreed upon lawmakers president biden wednesday includes crackdown crackdown cryptocurrencies one way raise revenue support spending get fox business go clicking get fox business go clicking order finance 500 billion new spending infrastructure initiatives group said intends strengthen tax enforcement comes cryptocurrencies previously reported fox business irs irs commissioner charles rettig requested broader authority congress june collect information cryptocurrency transactions previously reported fox business irs commissioner charles rettig requested broader authority congress june collect information cryptocurrency transactions rettig said said transactions design often radar screens noting recent market cap crypto world exceeded 2 trillion 8600 exchanges worldwide robinhood prices ipo 38 per share raising 2b robinhood prices ipo 38 per share raising 2b news robinhood market inc begin trading public company thursday following pricing pricing initial public offering robinhood market inc begin trading public company thursday following pricing initial public offering company says priced 55 million shares 38 share comes low end 38 42 offering range offering raised 2 billion puts robinhood valuation 32 billion shares begin trading trading nasdaq ticker symbol hood click read fox business click read fox business company reportedly working new feature help protect users crypto price volatility hiring manager formerly new feature help protect users crypto price volatility hiring manager formerly google improve overall product product design according coindesk fox business brittany de lea contributed report'}\n",
+            "{'segmented_data': {'aggregator_data': {'category_count': {'going down': 1, 'positive': 1, 'going up': 1}, 'max_scores': {'going down': 0.5592474937438965, 'positive': 0.66602623462677, 'going up': 0.6636335253715515}, 'aggregator_name': 'ClassificationMaxCategories'}}, 'meta': {'title': 'Institutional investors are bullish on bitcoin again, based on this key data point - CNBC', 'description': 'Institutional investors are bullish on bitcoin again, based on this key data point  CNBC', 'published date': 'Wed, 28 Jul 2021 21:10:13 GMT', 'url': 'https://www.cnbc.com/2021/07/28/institutional-investors-are-bullish-on-bitcoin-again-based-on-this-key-data-point.html', 'publisher': {'href': 'https://www.cnbc.com', 'title': 'CNBC'}, 'extracted_data': {'title': 'Institutional investors are bullish on bitcoin again, based on this key data point', 'author': 'Tanaya Macheel', 'hostname': 'cnbc.com', 'date': '2021-07-28', 'categories': 'Markets', 'tags': 'Coinbase Global Inc,Cryptocurrency,Bitcoin,Markets,business news;Coinbase Global Inc;Cryptocurrency;Bitcoin;Markets', 'fingerprint': '+Q2CPT89AAI5Vn5g8fu+SDmyvtE=', 'id': '7a9162df', 'source': 'https://www.cnbc.com/2021/07/28/institutional-investors-are-bullish-on-bitcoin-again-based-on-this-key-data-point.html', 'source-hostname': 'CNBC', 'excerpt': 'About $2.5 billion in bitcoin moved off crypto exchanges Wednesday morning.'}}, 'source_name': 'GoogleNews', 'processed_text': 'institutional investors bullish bitcoin based key data point cnbc 25 billion bitcoin moved crypto exchanges wednesday morning signal institutional investors getting sidelines bearish weeks cryptocurrency balance bitcoin exchanges fell 63289 btc transferred platforms according blockchain data data provider glassnode tracks flows exchanges including coinbase kraken binance coinbase kraken binance cryptocurrency'}\n",
+            "{'segmented_data': {'aggregator_data': {'category_count': {'going up': 4, 'positive': 1, 'negative': 3, 'going down': 1}, 'max_scores': {'going up': 0.9795074462890625, 'positive': 0.8666032552719116, 'negative': 0.6999003887176514, 'going down': 0.6623135209083557}, 'aggregator_name': 'ClassificationMaxCategories'}}, 'meta': {'title': 'Bitcoin Overbought at $40K Resistance; Support at $34K-$36K - CoinDesk - CoinDesk', 'description': 'Bitcoin Overbought at $40K Resistance; Support at $34K-$36K - CoinDesk  CoinDesk', 'published date': 'Thu, 29 Jul 2021 11:26:51 GMT', 'url': 'https://www.coindesk.com/bitcoin-overbought-at-40k-resistance-support-at-34k-36k', 'publisher': {'href': 'https://www.coindesk.com', 'title': 'CoinDesk'}, 'extracted_data': {'title': 'Bitcoin Overbought at $40K Resistance; Support at $34K-$36K - CoinDesk', 'author': 'Damanick Dantes; Damanick Dantes', 'hostname': 'coindesk.com', 'date': '2021-07-29', 'categories': 'Markets', 'tags': '', 'fingerprint': '+/Yd9ybp8pjiIf4k9x8lJMNK1D8=', 'id': '33c1eec8', 'source': 'https://www.coindesk.com/bitcoin-overbought-at-40k-resistance-support-at-34k-36k', 'source-hostname': 'CoinDesk', 'excerpt': 'Bitcoin (BTC) is re-testing the $40K resistance level and appears overbought. Buyers could take profits as short-term momentum wanes.'}}, 'source_name': 'GoogleNews', 'processed_text': 'bitcoin overbought 40k resistance support 34k 36k coindesk coindesk bitcoin btc completed recovery monday 10 pullback retesting 40000 resistance level cryptocurrency appears overbought could trigger profit taking near 25 rally past week btc completed recovery monday 10 pullback retesting 40000 40000 resistance level cryptocurrency appears overbought could trigger profit taking near 25 rally past week lower support seen around 34000 36000 middle twomonth range relative strength index rsi fourhour chart declining extreme overbought reading monday lower high rsi indicates bearish divergence divergence could stall bitcoin shortterm uptrend initial support seen 50period moving average fourhour chart currently 36000 lower support around 32000 34000 could stabilize pullback intermediateterm trend improving significant loss downside momentum past weeks buyers could remain active lower lower support levels although breakout 40000 45000 needed resume longterm uptrend strict set editorial policies coindesk independent operating subsidiary digital currency group invests cryptocurrencies blockchain startups'}\n",
+            "{'segmented_data': {'aggregator_data': {'category_count': {'going up': 15, 'positive': 13, 'going down': 7, 'negative': 6}, 'max_scores': {'going up': 0.9941290616989136, 'positive': 0.7976483702659607, 'going down': 0.9561280608177185, 'negative': 0.9916792511940002}, 'aggregator_name': 'ClassificationMaxCategories'}}, 'meta': {'title': 'Market Wrap: Bitcoin Expected to Pause Before Next Rally - CoinDesk - CoinDesk', 'description': 'Market Wrap: Bitcoin Expected to Pause Before Next Rally - CoinDesk  CoinDesk', 'published date': 'Wed, 28 Jul 2021 20:39:06 GMT', 'url': 'https://www.coindesk.com/market-wrap-bitcoin-expected-to-pause-before-next-rally', 'publisher': {'href': 'https://www.coindesk.com', 'title': 'CoinDesk'}, 'extracted_data': {'title': 'Market Wrap: Bitcoin Expected to Pause Before Next Rally - CoinDesk', 'author': 'Damanick Dantes; Frances Yue; Damanick Dantes; Frances Yue', 'hostname': 'coindesk.com', 'date': '2021-07-28', 'categories': 'Markets', 'tags': '', 'fingerprint': 'SPuuujtthDxGiJ7Dt6ayWt7Rg1g=', 'id': '9fc9b88e', 'source': 'https://www.coindesk.com/market-wrap-bitcoin-expected-to-pause-before-next-rally', 'source-hostname': 'CoinDesk', 'excerpt': 'Analysts expect bitcoin (BTC) to pause around $40K resistance before the next leg up. Sentiment is approving, but appears overbought.'}}, 'source_name': 'GoogleNews', 'processed_text': 'market wrap bitcoin expected pause next rally coindesk coindesk bitcoin buyers profittaking mode cryptocurrency tests 40000 resistance level sentiment significantly improved past week although analysts think time pause another leg higher btc easily broke 35k think probably harder time going 40k time time justin chuh senior trader wave financial wrote email coindesk wave financial wrote email coindesk miners sellers coming cash buyers unable push higher absorbing hit chuh wrote latest prices cryptocurrencies traditional markets p 500 44039 0056 gold 18081 05 10year treasury yield closed 1233 1233 compared 1238 moving average watch sentiment easily shift bullish bearish bitcoin remains consolidation phase strong overhead resistance btc already rejected 200day moving average like early june try breather hopefully crawling lower 35k chuh wrote bitcoin cross 200day signal confidence market market demonstrate many players bulls regained control market alexandra clark trader ukbased digitalasset broker globalblock wrote email coindesk globalblock wrote email coindesk trading activity sharply higher compared june shortdated call options actively traded wednesday morning bitcoin bitcoin approached 40000 according data skew gbtc discount narrows grayscale bitcoin trust gbtc shares narrowed discount relative underlying cryptocurrency held fund possibly sign buyers using vehicle bet recent recovery rally digitalasset markets narrowed discount relative underlying cryptocurrency cryptocurrency held fund possibly sign buyers using vehicle bet recent recovery rally digitalasset markets gbtc shares traded discount 66 net asset value nav tuesday smallest margin since june 22 based data provided crypto derivatives research firm skew discount widened 15 midjune skew discount discount widened 15 midjune investors may snapped gbtc shares hopes discount evaporate bull revival bitcoin scenario buyers would reap price gains bitcoin pocketing extra profit narrowing discount grayscale investments manages trust unit digital currency group also owns coindesk ether trading trading volumes surge ether market grew three times faster bitcoin market first six months year large investors diversified native token ethereum blockchain according crypto exchange coinbase halfyearly review published monday grew three times faster bitcoin market first six months year large large investors diversified native token ethereum blockchain according crypto exchange coinbase halfyearly review published monday crypto ceos bullish crypto investors endured one toughest quarters record despite recent rebound fears overregulation clampdown mining china environmental concerns concerns contributed negative sentiment sector coindesk 20 assets constitute 99 crypto market verifiable volume finished second quarter negative returns coindesk bitcoin price index xbx fell 40 thirdworst quarter ever conversely coindesk ether price index etx ended quarter 187 bitcoin recovered recovered losses level optimism far start second quarter crypto ceos however still expect sixfigure bitcoin price saying mediumterm outlook crypto market positive even sentiment coindesk canny reports reports stablecoins spotlight stablecoins existed roughly seven years talk never heated recent recent weeks within crypto community also among regulators traditional market investors much going world stablecoins recently overwhelming three big things happening three big things happening tether cloud traded cryptocurrency market usdt become backbone entire cryptocurrency ecosystem half bitcoin bitcoin trades made however tether company behind digital token plagued regulatory issues regulatory heat stablecoins total market capitalization 116 billion monday almost fourfold increase since start year according coinmarketcap growth increased attention us regulators circle going public public stablecoin issuers disclose info circle issuer usdc second largest stablecoin also spotlight circle plans go public merger concord acquisition corp publicly traded special purpose acquisition corporation spac deal would value crypto financial services firm 45 billion another stablecoin issuer issuer paxos also released first time breakdown reserves stablecoins paxos standard binancelabeled busd 96 reserves held cash cash equivalents 4 invested us treasury bills june 30 altcoin roundup xrp rallies xrp cryptocurrency used ripple payments network rallied fiveweek high wednesday company said said targeting 18 billion filipino remittance market cryptocurrency changed hands 074 european hours hitting highest level since june 21 representing 13 gain day according coindesk 20 data ether trading volume surges ether trading volume totaled 14 trillion januarytojune period 1461 rise 92 billion billion observed first half last year burger king brazil accepts dogecoin burger king brazil accepts dogecoin doge 264 payment method purchase fastfood chain dogpper dog snack service available since monday according company official website though users check availability delivery region company company said dogpper dog treat plays name burger king bestknown menu item whopper costs 3 doge company recommends purchasing maximum five units per order availability reasons relevant news luxor technologies launches index crypto mining stocks mining difficulty expected increase first time since since china crackdown ubs mulls offering prime brokerage services crypto etps european hedge funds sources grayscale bitcoin trust discount narrows unlocks pass ftx renames blockfolio trading app ftx robinhood investigation finra registration violation markets digital assets coindesk 20 ended higher higher wednesday notable winners 2100 utc 400 pm et notable losers yearn finance yfi 023 yearn finance yfi 023 strict set editorial policies coindesk independent operating subsidiary digital currency group invests cryptocurrencies blockchain startups'}\n",
+            "{'segmented_data': {'aggregator_data': {'category_count': {'going up': 8, 'positive': 4, 'negative': 3, 'going down': 1}, 'max_scores': {'going up': 0.9944525361061096, 'positive': 0.8424510955810547, 'negative': 0.7220838665962219, 'going down': 0.5190926194190979}, 'aggregator_name': 'ClassificationMaxCategories'}}, 'meta': {'title': \"Bitcoin is at a 'do-or-die' moment and could surge if it can hold above $40,000, analysts say - Markets Insider\", 'description': \"Bitcoin is at a 'do-or-die' moment and could surge if it can hold above $40,000, analysts say  Markets Insider\", 'published date': 'Thu, 29 Jul 2021 10:24:13 GMT', 'url': 'https://markets.businessinsider.com/news/currencies/bitcoin-price-outlook-btc-rally-40000-level-crypto-elon-musk-2021-7', 'publisher': {'href': 'https://markets.businessinsider.com', 'title': 'Markets Insider'}, 'extracted_data': {'title': \"Bitcoin is at a 'do-or-die' moment and could surge if it holds above $40,000, analysts say\", 'author': 'Harry Robertson', 'hostname': 'businessinsider.com', 'date': '2021-07-29', 'categories': '', 'tags': '', 'fingerprint': '5KmRiFmCkYpMa+BZs3PF/o1sB+g=', 'id': 'dd6031b3', 'source': 'https://markets.businessinsider.com/news/currencies/bitcoin-price-outlook-btc-rally-40000-level-crypto-elon-musk-2021-7', 'source-hostname': 'markets.businessinsider.com', 'excerpt': 'Bitcoin has climbed sharply in recent days. REUTERS/Dado Ruvic Bitcoin could surge if it can hold on to its recent gains and consolidate above...'}}, 'source_name': 'GoogleNews', 'processed_text': 'bitcoin doordie moment could surge hold 40000 analysts say markets insider bitcoin doordie moment could surge holds 40000 analysts say bitcoin could surge hold recent gains consolidate 40000 analysts said kraken analysts said token facing doordie moment could hit new alltime high yet many risks risks horizon particularly threat regulation around world bitcoin facing doordie moment could move sharply higher hold stellar weekly gains taken roughly 40000 crypto analysts said bitcoin facing doordie moment could move sharply higher hold stellar weekly gains taken roughly 40000 crypto analysts analysts said world biggest cryptocurrency risen around 17 week far 40403 thursday according bloomberg data positive comments elon musk rumours amazon could accept crypto payments elon musk rumours amazon could accept crypto payments analysts kraken crypto exchange said past week explosive explosive marketwide shift sentiment seems converted bears bulls said bitcoin need consolidate key psychological level 40000 investors feel confident buying token pushing price higher right doordie bulls said note given btc struggles cracking 40000 42000 resistance past bulls however need turn 40000 40000 support look breakout several months rangebound trading 30000 42000 analysts added odds btc scoring new alltime high yearend improved read veteran columnist breaks 3 things every crypto newbie know trading 3 common mistakes avoid veteran columnist breaks 3 things every crypto newbie know know trading 3 common mistakes avoid alexandra clark sales trader digital asset broker globalblock said rebound 1 billion worth short crypto positions liquidated bitcoin dominance inches closer 50 bitcoin accounted around 48 total crypto market thursday according coinmarketcap jpmorgan crypto expert expert nikolaos panigirtzoglou said bitcoin dominance rising 50 would signal momentum building coinmarketcap jpmorgan crypto expert nikolaos panigirtzoglou said bitcoin dominance rising 50 would signal momentum building analysts said another bullish signal fact amount bitcoin exchanges fell sharply sharply wednesday suggesting big buyers moving crypto storage yet major risks outlook bitcoin key one threat tougher regulations senator elizabeth warren week pushing tougher rules protect consumers wildly volatile market pushing tougher rules protect consumers wildly volatile market wrote treasury treasury secretary janet yellen say us financial stability oversight council must act quickly use statutory authority address cryptocurrencies risks regulate market ensure safety stability consumers financial system business insider'}\n",
+            "{'segmented_data': {'aggregator_data': {'category_count': {'going down': 3, 'negative': 4, 'going up': 8, 'positive': 7}, 'max_scores': {'going down': 0.9420739412307739, 'negative': 0.8024093508720398, 'going up': 0.9577222466468811, 'positive': 0.7244963645935059}, 'aggregator_name': 'ClassificationMaxCategories'}}, 'meta': {'title': 'Bitcoin hash rate rebounds as major miners are coming back online - Cointelegraph', 'description': 'Bitcoin hash rate rebounds as major miners are coming back online  Cointelegraph', 'published date': 'Thu, 29 Jul 2021 10:55:13 GMT', 'url': 'https://cointelegraph.com/news/bitcoin-hash-rate-rebounds-as-major-miners-are-coming-back-online', 'publisher': {'href': 'https://cointelegraph.com', 'title': 'Cointelegraph'}, 'extracted_data': {'title': 'Bitcoin hash rate rebounds as major miners are coming back online', 'author': 'Arijit Sarkar', 'hostname': 'cointelegraph.com', 'date': '2021-07-29', 'categories': 'Latest News', 'tags': '#Bitcoin;#China;#Bitcoin Regulation;#Bitcoin Mining;#Regulation;#Hash Rate', 'fingerprint': 'UlZKhwOEas+O9knXhORLHUGtReU=', 'id': 'ae806a36', 'source': 'https://cointelegraph.com/news/bitcoin-hash-rate-rebounds-as-major-miners-are-coming-back-online', 'source-hostname': 'Cointelegraph', 'excerpt': 'Restrictions in China have forced homegrown Bitcoin miners and miners to move out to crypto-friendly nations.'}}, 'source_name': 'GoogleNews', 'processed_text': 'bitcoin hash rate rebounds major miners coming back online cointelegraph china stringent crypto regulations meant closing shop many chinese businesses within bitcoin btc mining ecosystem sudden disappearance bitcoin miners grid resulted falling hash rates hashing performance cumulative computing computing power bitcoin network dropped alltime high 180 exahashes per second ehs 84 ehs 21 days btc mining ecosystem sudden disappearance bitcoin miners grid resulted falling hash rates hashing performance cumulative computing power bitcoin network dropped alltime high 180 exahashes per second ehs ehs 84 ehs 21 days hash rate drop directly attributable drop number chinese miners blockchaincom explorer data suggests steady increase mining difficulty since june 3 suggests steady increase mining difficulty since june 3 since drop hash rate increased 2138 owing return migrating chinese miners miners started operating regions resulting adjustment bitcoin mining difficulty translates higher computational costs formerly chinabased miners come back online operational costs bitcoin miners worldwide continue increase given initial resistance chinese government miners lookout countries offers offers regulatory clarity lower electricity costs initial resistance chinese government miners lookout countries offers regulatory clarity lower electricity costs related mine bitcoin everything need know mine bitcoin everything need know pretext shielding citizens highrisk investments chinese chinese authorities forced crypto businesses highly limit crypto portfolio offerings move offshore reported cointelegraph earlier month wang juana member china oecd blockchain expert policy advisory board stated reported cointelegraph earlier month wang juana member china oecd blockchain expert expert policy advisory board stated seeing cryptocurrency market enter path dechinaisation first trading computing power based series stronger steps taken cryptocurrencies bitcoin mining last week beijing peak september 2019 china contributed 7553 global bitcoin hash rate shown steady decline way way mining ban imposed china current hash rate contribution stands 4604 united states expanded share 1685 globally contributed 7553 global bitcoin hash rate shown steady decline way mining ban imposed china current hash rate contribution stands 4604 united states expanded share 1685 globally globally cointelegraph also covered instances jurisdictions including russia kazakhstan canada seen greater involvement crypto offering home migrating chinese miners many experts agree china shattered monopoly mining industry signals positive move toward decentralization crypto ecosystem russia russia kazakhstan canada seen greater involvement crypto offering home migrating chinese miners many experts agree china shattered monopoly mining industry signals positive move toward decentralization crypto ecosystem'}\n",
+            "{'segmented_data': {'aggregator_data': {'category_count': {'positive': 1, 'going up': 1, 'negative': 1}, 'max_scores': {'positive': 0.6331064701080322, 'going up': 0.49385496973991394, 'negative': 0.42282581329345703}, 'aggregator_name': 'ClassificationMaxCategories'}}, 'meta': {'title': 'Corporate crypto 101: How companies are using Bitcoin and other digital currency - Fortune', 'description': 'Corporate crypto 101: How companies are using Bitcoin and other digital currency  Fortune', 'published date': 'Thu, 29 Jul 2021 09:18:00 GMT', 'url': 'https://fortune.com/2021/07/29/companies-using-bitcoin-btc-crypto-101/', 'publisher': {'href': 'https://fortune.com', 'title': 'Fortune'}, 'extracted_data': None}, 'source_name': 'GoogleNews', 'processed_text': 'corporate crypto companies using bitcoin digital currency fortune'}\n",
+            "{'segmented_data': {'aggregator_data': {'category_count': {'going down': 3, 'negative': 4, 'going up': 7, 'positive': 5}, 'max_scores': {'going down': 0.991021990776062, 'negative': 0.6630723476409912, 'going up': 0.9641163349151611, 'positive': 0.774677038192749}, 'aggregator_name': 'ClassificationMaxCategories'}}, 'meta': {'title': 'Why Bitcoin, Ethereum, and Other Cryptocurrencies Dropped Today - Motley Fool', 'description': 'Why Bitcoin, Ethereum, and Other Cryptocurrencies Dropped Today  Motley Fool', 'published date': 'Thu, 29 Jul 2021 14:23:00 GMT', 'url': 'https://www.fool.com/investing/2021/07/29/why-bitcoin-ethereum-and-other-cryptocurrencies-dr/', 'publisher': {'href': 'https://www.fool.com', 'title': 'Motley Fool'}, 'extracted_data': {'title': 'Why Bitcoin, Ethereum, and Other Cryptocurrencies Dropped Today | The Motley Fool', 'author': 'Howard Smith', 'hostname': 'fool.com', 'date': '2021-07-29', 'categories': 'investing', 'tags': 'cryptocurrency', 'fingerprint': 'wF6P1vrN7NT9W2UoH2NTw9FaT6k=', 'id': 'e6eb6e49', 'source': 'https://www.fool.com/investing/2021/07/29/why-bitcoin-ethereum-and-other-cryptocurrencies-dr/', 'source-hostname': 'The Motley Fool', 'excerpt': 'A new cryptocurrency tax may help fund the infrastructure bill.'}}, 'source_name': 'GoogleNews', 'processed_text': 'bitcoin ethereum cryptocurrencies dropped today motley fool happened first thought ongoing discussions among politicians regarding large infrastructure bill nt anything bitcoin crypto btc cryptocurrency us senate voted yesterday advance formal negotiations approximately 1 trillion infrastructure infrastructure package cryptocurrencies became part discussion vote one reasons bitcoin ethereum crypto eth dogecoin crypto doge ripple crypto xrp lower today cryptocurrencies opened 2 5 945 edt bitcoin ethereum dogecoin less 1 lower ripple still 35 crypto btc cryptocurrency us senate voted voted yesterday advance formal negotiations approximately 1 trillion infrastructure package cryptocurrencies became part discussion vote one reasons bitcoin ethereum crypto eth dogecoin crypto doge ripple crypto xrp lower today cryptocurrencies opened 2 5 945 edt bitcoin ethereum dogecoin less 1 1 lower ripple still 35 last night senate voted 6732 advance bipartisan infrastructure bill vote included 17 republicans agreeing move legislation forward addition news related new tax retail investors eyeing today initial public offering ipo retail brokerage robinhood nasdaq hood allows trading trading bitcoin ethereum dogecoin cryptocurrencies initial public offering ipo retail brokerage robinhood nasdaq hood allows trading bitcoin ethereum dogecoin cryptocurrencies deal moving infrastructure package forward includes imposing stricter rules cryptocurrency investors would collect new taxes taxes raising 28 billion according bloomberg money would used partially fund 550 billion spending transportation power grid specifically new rules would require businesses report cryptocurrency transactions valued 10000 would also require brokers report transactions involving digital assets internal internal revenue service cryptocurrency transactions valued 10000 would also require brokers report transactions involving digital assets internal revenue service robinhood begin trading publicly today cryptocurrency broker caters retail investors demographic also active trading cryptocurrencies cryptocurrencies today ipo raise robinhood public profile cryptocurrencies also reacting potential new tax rules result active trading currencies short term including today downward moves'}\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    }
+  ]
+}
diff --git a/obsei_module/obsei/__init__.py b/obsei_module/obsei/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..845724ca59968b71689a82b2b48d7bd93f142a0c
--- /dev/null
+++ b/obsei_module/obsei/__init__.py
@@ -0,0 +1,19 @@
+import logging
+
+from obsei._version import __version__
+
+logging.basicConfig(
+    format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
+    datefmt="%m/%d/%Y %H:%M:%S",
+    level=logging.INFO,
+)
+
+init_logger: logging.Logger = logging.getLogger(__name__)
+
+installation_message: str = """
+By default `pip install obsei` will only install core dependencies.
+To install all required dependencies use `pip install obsei[all]`.
+Refer https://obsei.com/#install-obsei for more options.
+"""
+
+init_logger.warning(installation_message)
diff --git a/obsei_module/obsei/__pycache__/__init__.cpython-311.pyc b/obsei_module/obsei/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b0492562a1b93307a5260a9c0d56e028382c889e
Binary files /dev/null and b/obsei_module/obsei/__pycache__/__init__.cpython-311.pyc differ
diff --git a/obsei_module/obsei/__pycache__/_version.cpython-311.pyc b/obsei_module/obsei/__pycache__/_version.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8fba7021e702d830c39ccfccd78c810774f436a2
Binary files /dev/null and b/obsei_module/obsei/__pycache__/_version.cpython-311.pyc differ
diff --git a/obsei_module/obsei/__pycache__/payload.cpython-311.pyc b/obsei_module/obsei/__pycache__/payload.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3209f17f70b2dcaf5ba259e64104d7d57c92507c
Binary files /dev/null and b/obsei_module/obsei/__pycache__/payload.cpython-311.pyc differ
diff --git a/obsei_module/obsei/_version.py b/obsei_module/obsei/_version.py
new file mode 100644
index 0000000000000000000000000000000000000000..6561790f155f6bfd436e5b19b2f0a1e7f20c0259
--- /dev/null
+++ b/obsei_module/obsei/_version.py
@@ -0,0 +1 @@
+__version__ = "0.0.15"
diff --git a/obsei_module/obsei/analyzer/__init__.py b/obsei_module/obsei/analyzer/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/obsei_module/obsei/analyzer/__pycache__/__init__.cpython-311.pyc b/obsei_module/obsei/analyzer/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9e298567194167a18a430b24fca70589bbd39eb7
Binary files /dev/null and b/obsei_module/obsei/analyzer/__pycache__/__init__.cpython-311.pyc differ
diff --git a/obsei_module/obsei/analyzer/__pycache__/classification_analyzer.cpython-311.pyc b/obsei_module/obsei/analyzer/__pycache__/classification_analyzer.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0dba58778513427df17193d0f8bcf32dc4569844
Binary files /dev/null and b/obsei_module/obsei/analyzer/__pycache__/classification_analyzer.cpython-311.pyc differ
diff --git a/obsei_module/obsei/analyzer/__pycache__/sentiment_analyzer.cpython-311.pyc b/obsei_module/obsei/analyzer/__pycache__/sentiment_analyzer.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3fea43b45919e6af8f007ed8f0acd1af3f5c5730
Binary files /dev/null and b/obsei_module/obsei/analyzer/__pycache__/sentiment_analyzer.cpython-311.pyc differ
diff --git a/obsei_module/obsei/analyzer/base_analyzer.py b/obsei_module/obsei/analyzer/base_analyzer.py
new file mode 100644
index 0000000000000000000000000000000000000000..aae074243dd431a9f48e7e253f627dc07ecaf7f2
--- /dev/null
+++ b/obsei_module/obsei/analyzer/base_analyzer.py
@@ -0,0 +1,82 @@
+from abc import abstractmethod
+from typing import Any, Generator, List, Optional
+
+from pydantic import Field, PrivateAttr
+from pydantic_settings import BaseSettings
+
+from obsei.misc import gpu_util
+from obsei.payload import TextPayload
+from obsei.postprocessor.inference_aggregator import (
+    InferenceAggregator,
+    InferenceAggregatorConfig,
+)
+from obsei.preprocessor.text_splitter import TextSplitter, TextSplitterConfig
+from obsei.workflow.base_store import BaseStore
+
+MAX_LENGTH: int = 510
+DEFAULT_BATCH_SIZE_GPU: int = 64
+DEFAULT_BATCH_SIZE_CPU: int = 4
+
+
+class BaseAnalyzerConfig(BaseSettings):
+    TYPE: str = "Base"
+    use_splitter_and_aggregator: Optional[bool] = False
+    splitter_config: Optional[TextSplitterConfig] = None
+    aggregator_config: Optional[InferenceAggregatorConfig] = None
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+
+        if self.use_splitter_and_aggregator and not self.splitter_config and not self.aggregator_config:
+            raise AttributeError("Need splitter_config and aggregator_config if enabling use_splitter_and_aggregator "
+                                 "option")
+
+    class Config:
+        arbitrary_types_allowed = True
+
+
+class BaseAnalyzer(BaseSettings):
+    _device_id: int = PrivateAttr()
+    TYPE: str = "Base"
+    store: Optional[BaseStore] = None
+    device: str = "auto"
+    batch_size: int = -1
+    splitter: TextSplitter = Field(default=TextSplitter())
+    aggregator: InferenceAggregator = Field(default=InferenceAggregator())
+
+    """
+        auto: choose gpu if present else use cpu
+        cpu: use cpu
+        cuda:{id} - cuda device id
+    """
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+
+        self._device_id = gpu_util.get_device_id(self.device)
+        if self.batch_size < 0:
+            self.batch_size = (
+                DEFAULT_BATCH_SIZE_CPU
+                if self._device_id == 0
+                else DEFAULT_BATCH_SIZE_GPU
+            )
+
+    @abstractmethod
+    def analyze_input(
+        self,
+        source_response_list: List[TextPayload],
+        analyzer_config: Optional[BaseAnalyzerConfig] = None,
+        **kwargs: Any,
+    ) -> List[TextPayload]:
+        pass
+
+    @staticmethod
+    def batchify(
+        payload_list: List[TextPayload],
+        batch_size: int,
+    ) -> Generator[List[TextPayload], None, None]:
+        for index in range(0, len(payload_list), batch_size):
+            yield payload_list[index : index + batch_size]
+
+    class Config:
+        arbitrary_types_allowed = True
diff --git a/obsei_module/obsei/analyzer/classification_analyzer.py b/obsei_module/obsei/analyzer/classification_analyzer.py
new file mode 100644
index 0000000000000000000000000000000000000000..507af4f6a40f64b8af40d5fe49113f7abb5b110e
--- /dev/null
+++ b/obsei_module/obsei/analyzer/classification_analyzer.py
@@ -0,0 +1,172 @@
+import logging
+from typing import Any, Dict, List, Optional
+
+from pydantic import Field, PrivateAttr
+from transformers import Pipeline, pipeline
+
+from obsei.analyzer.base_analyzer import (
+    BaseAnalyzer,
+    BaseAnalyzerConfig,
+    MAX_LENGTH,
+)
+from obsei.payload import TextPayload
+from obsei.postprocessor.inference_aggregator import InferenceAggregatorConfig
+from obsei.postprocessor.inference_aggregator_function import ClassificationAverageScore
+
+logger = logging.getLogger(__name__)
+
+
+class ClassificationAnalyzerConfig(BaseAnalyzerConfig):
+    TYPE: str = "Classification"
+    labels: Optional[List[str]] = None
+    label_map: Optional[Dict[str, str]] = None
+    multi_class_classification: bool = True
+    add_positive_negative_labels: bool = True
+    aggregator_config: InferenceAggregatorConfig = Field(
+        InferenceAggregatorConfig(aggregate_function=ClassificationAverageScore())
+    )
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+
+        if self.labels is None:
+            self.multi_class_classification = False
+            self.add_positive_negative_labels = False
+
+
+class TextClassificationAnalyzer(BaseAnalyzer):
+    TYPE: str = "Classification"
+    pipeline_name: str = "text-classification"
+    _pipeline: Pipeline = PrivateAttr()
+    _max_length: int = PrivateAttr()
+    model_name_or_path: str
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+        self._pipeline = pipeline(
+            self.pipeline_name,
+            model=self.model_name_or_path,
+            device=self._device_id,
+        )
+
+        if hasattr(self._pipeline.model.config, "max_position_embeddings"):
+            self._max_length = self._pipeline.model.config.max_position_embeddings
+        else:
+            self._max_length = MAX_LENGTH
+
+    def prediction_from_model(
+        self,
+        texts: List[str],
+        analyzer_config: Optional[ClassificationAnalyzerConfig] = None,
+    ) -> List[Dict[str, Any]]:
+        prediction = self._pipeline(texts)
+        predictions = prediction if isinstance(prediction, list) else [prediction]
+        label_map = analyzer_config.label_map if analyzer_config is not None else {}
+        label_map = label_map or {}
+        return [
+            {
+                label_map.get(prediction["label"], prediction["label"]): prediction["score"]
+            } for prediction in predictions
+        ]
+
+    def analyze_input(  # type: ignore[override]
+        self,
+        source_response_list: List[TextPayload],
+        analyzer_config: Optional[ClassificationAnalyzerConfig] = None,
+        **kwargs: Any,
+    ) -> List[TextPayload]:
+        analyzer_output: List[TextPayload] = []
+
+        if (
+            analyzer_config is not None
+            and analyzer_config.use_splitter_and_aggregator
+            and analyzer_config.splitter_config
+        ):
+            source_response_list = self.splitter.preprocess_input(
+                source_response_list,
+                config=analyzer_config.splitter_config,
+            )
+
+        for batch_responses in self.batchify(source_response_list, self.batch_size):
+            texts = [
+                source_response.processed_text[: self._max_length]
+                for source_response in batch_responses
+            ]
+
+            batch_predictions = self.prediction_from_model(texts=texts, analyzer_config=analyzer_config)
+
+            for score_dict, source_response in zip(batch_predictions, batch_responses):
+                segmented_data = {
+                    "classifier_data": score_dict
+                }
+
+                if source_response.segmented_data:
+                    segmented_data = {
+                        **segmented_data,
+                        **source_response.segmented_data,
+                    }
+
+                analyzer_output.append(
+                    TextPayload(
+                        processed_text=source_response.processed_text,
+                        meta=source_response.meta,
+                        segmented_data=segmented_data,
+                        source_name=source_response.source_name,
+                    )
+                )
+
+        if (
+            analyzer_config is not None
+            and analyzer_config.use_splitter_and_aggregator
+            and analyzer_config.aggregator_config
+        ):
+            analyzer_output = self.aggregator.postprocess_input(
+                input_list=analyzer_output,
+                config=analyzer_config.aggregator_config,
+            )
+
+        return analyzer_output
+
+
+class ZeroShotClassificationAnalyzer(TextClassificationAnalyzer):
+    pipeline_name: str = "zero-shot-classification"
+
+    def prediction_from_model(
+        self,
+        texts: List[str],
+        analyzer_config: Optional[ClassificationAnalyzerConfig] = None,
+    ) -> List[Dict[str, Any]]:
+        if analyzer_config is None:
+            raise ValueError("analyzer_config can't be None")
+
+        labels = analyzer_config.labels or []
+        if analyzer_config.add_positive_negative_labels:
+            if "positive" not in labels:
+                labels.append("positive")
+            if "negative" not in labels:
+                labels.append("negative")
+
+        if len(labels) == 0:
+            raise ValueError("`labels` can't be empty or `add_positive_negative_labels` should be False")
+
+        prediction = self._pipeline(
+            texts, candidate_labels=labels, multi_label=analyzer_config.multi_class_classification
+        )
+        predictions = prediction if isinstance(prediction, list) else [prediction]
+
+        return [dict(zip(prediction["labels"], prediction["scores"])) for prediction in predictions]
+
+    def analyze_input(  # type: ignore[override]
+        self,
+        source_response_list: List[TextPayload],
+        analyzer_config: Optional[ClassificationAnalyzerConfig] = None,
+        **kwargs: Any,
+    ) -> List[TextPayload]:
+        if analyzer_config is None:
+            raise ValueError("analyzer_config can't be None")
+
+        return super().analyze_input(
+            source_response_list=source_response_list,
+            analyzer_config=analyzer_config,
+            **kwargs
+        )
diff --git a/obsei_module/obsei/analyzer/dummy_analyzer.py b/obsei_module/obsei/analyzer/dummy_analyzer.py
new file mode 100644
index 0000000000000000000000000000000000000000..1119107061ce80db049c18acc7a43c4ef34bf09d
--- /dev/null
+++ b/obsei_module/obsei/analyzer/dummy_analyzer.py
@@ -0,0 +1,46 @@
+from typing import Any, List, Optional
+
+from obsei.analyzer.base_analyzer import (
+    BaseAnalyzer,
+    BaseAnalyzerConfig,
+)
+from obsei.payload import TextPayload
+
+
+class DummyAnalyzerConfig(BaseAnalyzerConfig):
+    TYPE: str = "Dummy"
+    dummy_data: Optional[Any] = None
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+
+
+class DummyAnalyzer(BaseAnalyzer):
+    def analyze_input(  # type: ignore[override]
+        self,
+        source_response_list: List[TextPayload],
+        analyzer_config: Optional[DummyAnalyzerConfig] = None,
+        **kwargs: Any,
+    ) -> List[TextPayload]:
+        responses = []
+        for source_response in source_response_list:
+
+            segmented_data = {
+                "dummy_data": None
+                if not analyzer_config
+                else analyzer_config.dummy_data
+            }
+
+            if source_response.segmented_data:
+                segmented_data = {**segmented_data, **source_response.segmented_data}
+
+            responses.append(
+                TextPayload(
+                    processed_text=source_response.processed_text,
+                    meta=source_response.meta,
+                    source_name=source_response.source_name,
+                    segmented_data=segmented_data,
+                )
+            )
+
+        return responses
diff --git a/obsei_module/obsei/analyzer/ner_analyzer.py b/obsei_module/obsei/analyzer/ner_analyzer.py
new file mode 100644
index 0000000000000000000000000000000000000000..847f722cbfe0c5fcf835286d08df4d106de1849e
--- /dev/null
+++ b/obsei_module/obsei/analyzer/ner_analyzer.py
@@ -0,0 +1,165 @@
+import logging
+from typing import Any, Dict, Generator, List, Optional, Tuple, Iterator
+from pydantic import PrivateAttr
+from transformers import (
+    AutoModelForTokenClassification,
+    AutoTokenizer,
+    Pipeline,
+    pipeline,
+)
+import spacy
+from spacy.language import Language
+from spacy.tokens.doc import Doc
+from obsei.analyzer.base_analyzer import (
+    BaseAnalyzer,
+    BaseAnalyzerConfig,
+    MAX_LENGTH,
+)
+from obsei.payload import TextPayload
+
+logger = logging.getLogger(__name__)
+
+
+class TransformersNERAnalyzer(BaseAnalyzer):
+    _pipeline: Pipeline = PrivateAttr()
+    _max_length: int = PrivateAttr()
+    TYPE: str = "NER"
+    model_name_or_path: str
+    tokenizer_name: Optional[str] = None
+    grouped_entities: Optional[bool] = True
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+
+        model = AutoModelForTokenClassification.from_pretrained(self.model_name_or_path)
+        tokenizer = AutoTokenizer.from_pretrained(
+            self.tokenizer_name if self.tokenizer_name else self.model_name_or_path,
+            use_fast=True,
+        )
+
+        self._pipeline = pipeline(
+            "ner",
+            model=model,
+            tokenizer=tokenizer,
+            grouped_entities=self.grouped_entities,
+            device=self._device_id,
+        )
+
+        if hasattr(self._pipeline.model.config, "max_position_embeddings"):
+            self._max_length = self._pipeline.model.config.max_position_embeddings
+        else:
+            self._max_length = MAX_LENGTH
+
+    def _prediction_from_model(self, texts: List[str]) -> List[List[Dict[str, float]]]:
+        prediction = self._pipeline(texts)
+        return (   # type: ignore[no-any-return]
+            prediction
+            if len(prediction) and isinstance(prediction[0], list)
+            else [prediction]
+        )
+
+    def analyze_input(
+        self,
+        source_response_list: List[TextPayload],
+        analyzer_config: Optional[BaseAnalyzerConfig] = None,
+        **kwargs: Any,
+    ) -> List[TextPayload]:
+        analyzer_output: List[TextPayload] = []
+
+        for batch_responses in self.batchify(source_response_list, self.batch_size):
+            texts = [
+                source_response.processed_text[: self._max_length]
+                for source_response in batch_responses
+            ]
+            batch_predictions = self._prediction_from_model(texts)
+
+            for prediction, source_response in zip(batch_predictions, batch_responses):
+                segmented_data = {"ner_data": prediction}
+                if source_response.segmented_data:
+                    segmented_data = {
+                        **segmented_data,
+                        **source_response.segmented_data,
+                    }
+
+                analyzer_output.append(
+                    TextPayload(
+                        processed_text=source_response.processed_text,
+                        meta=source_response.meta,
+                        segmented_data=segmented_data,
+                        source_name=source_response.source_name,
+                    )
+                )
+        return analyzer_output
+
+
+class SpacyNERAnalyzer(BaseAnalyzer):
+    _nlp: Language = PrivateAttr()
+    TYPE: str = "NER"
+    model_name_or_path: str
+    tokenizer_name: Optional[str] = None
+    grouped_entities: Optional[bool] = True
+    n_process: int = 1
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+        self._nlp = spacy.load(
+            self.model_name_or_path,
+            disable=["tagger", "parser", "attribute_ruler", "lemmatizer"],
+        )
+
+    def _spacy_pipe_batchify(
+        self,
+        texts: List[str],
+        batch_size: int,
+        source_response_list: List[TextPayload],
+    ) -> Generator[Tuple[Iterator[Doc], List[TextPayload]], None, None]:
+        for index in range(0, len(texts), batch_size):
+            yield (
+                self._nlp.pipe(
+                    texts=texts[index: index + batch_size],
+                    batch_size=batch_size,
+                    n_process=self.n_process,
+                ),
+                source_response_list[index: index + batch_size],
+            )
+
+    def analyze_input(
+        self,
+        source_response_list: List[TextPayload],
+        analyzer_config: Optional[BaseAnalyzerConfig] = None,
+        **kwargs: Any,
+    ) -> List[TextPayload]:
+        analyzer_output: List[TextPayload] = []
+        texts = [
+            source_response.processed_text for source_response in source_response_list
+        ]
+
+        for batch_docs, batch_source_response in self._spacy_pipe_batchify(
+            texts, self.batch_size, source_response_list
+        ):
+            for doc, source_response in zip(batch_docs, batch_source_response):
+                ner_prediction = [
+                    {
+                        "entity_group": ent.label_,
+                        "word": ent.text,
+                        "start": ent.start_char,
+                        "end": ent.end_char,
+                    }
+                    for ent in doc.ents
+                ]
+                segmented_data = {"ner_data": ner_prediction}
+                if source_response.segmented_data:
+                    segmented_data = {
+                        **segmented_data,
+                        **source_response.segmented_data,
+                    }
+                analyzer_output.append(
+                    TextPayload(
+                        processed_text=source_response.processed_text,
+                        meta=source_response.meta,
+                        segmented_data=segmented_data,
+                        source_name=source_response.source_name,
+                    )
+                )
+
+        return analyzer_output
diff --git a/obsei_module/obsei/analyzer/pii_analyzer.py b/obsei_module/obsei/analyzer/pii_analyzer.py
new file mode 100644
index 0000000000000000000000000000000000000000..e3826c7b1491a938c4399d55f1f74dc00c9e2475
--- /dev/null
+++ b/obsei_module/obsei/analyzer/pii_analyzer.py
@@ -0,0 +1,191 @@
+import logging
+from typing import Any, Dict, List, Optional
+
+from presidio_analyzer import AnalyzerEngine, EntityRecognizer
+from presidio_anonymizer import AnonymizerEngine
+from presidio_analyzer.nlp_engine import NlpEngineProvider
+from presidio_anonymizer.entities.engine import OperatorConfig
+from pydantic import BaseModel, Field, PrivateAttr
+
+from obsei.analyzer.base_analyzer import (
+    BaseAnalyzer,
+    BaseAnalyzerConfig,
+)
+from obsei.payload import TextPayload
+
+logger = logging.getLogger(__name__)
+
+
+class PresidioModelConfig(BaseModel):
+    lang_code: Optional[str] = Field("en")
+    model_name: Optional[str] = Field("en_core_web_lg")
+
+
+class PresidioEngineConfig(BaseModel):
+    nlp_engine_name: Optional[str] = Field("spacy")
+    models: Optional[List[PresidioModelConfig]] = None
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+
+        if not self.models or len(self.models) == 0:
+            self.models = [PresidioModelConfig()]
+
+
+class PresidioAnonymizerConfig(OperatorConfig, BaseModel): # type: ignore
+    def __init__(self, anonymizer_name: str, params: Optional[Dict[str, Any]] = None):
+        super().__init__(anonymizer_name=anonymizer_name, params=params)
+
+    class Config:
+        arbitrary_types_allowed = True
+
+
+class PresidioPIIAnalyzerConfig(BaseAnalyzerConfig):
+    TYPE: str = "PresidioPII"
+    # To find more details refer https://microsoft.github.io/presidio/anonymizer/
+    anonymizers_config: Optional[Dict[str, PresidioAnonymizerConfig]] = None
+    # To see list of supported entities refer https://microsoft.github.io/presidio/supported_entities/
+    # By default it will search for all the supported entities
+    entities: Optional[List[str]] = None
+    analyze_only: Optional[bool] = False
+    replace_original_text: Optional[bool] = True
+    # Whether the analysis decision process steps returned in the response
+    return_decision_process: Optional[bool] = False
+
+
+class PresidioPIIAnalyzer(BaseAnalyzer):
+    _analyzer: AnalyzerEngine = PrivateAttr()
+    _anonymizer: AnonymizerEngine = PrivateAttr()
+    TYPE: str = "PresidioPII"
+    engine_config: Optional[PresidioEngineConfig] = None
+    # To see list of supported entities refer https://microsoft.github.io/presidio/supported_entities/
+    # To add customer recognizers refer https://microsoft.github.io/presidio/analyzer/adding_recognizers/
+    entity_recognizers: Optional[List[EntityRecognizer]] = None
+    # To find more details refer https://microsoft.github.io/presidio/anonymizer/
+    anonymizers_config: Optional[Dict[str, OperatorConfig]] = None
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+
+        if not self.engine_config:
+            self.engine_config = PresidioEngineConfig()
+
+        if not self.engine_config.models or len(self.engine_config.models) == 0:
+            self.engine_config.models = [PresidioModelConfig()]
+
+        # If spacy engine then load Spacy models and select languages
+        languages = []
+        for model_config in self.engine_config.models:
+            languages.append(model_config.lang_code)
+
+            # Check SpacyNlpEngine.engine_name
+            if (
+                self.engine_config.nlp_engine_name == "spacy"
+                and model_config.model_name is not None
+            ):
+                try:
+                    spacy_model = __import__(model_config.model_name)
+                    spacy_model.load()
+                    logger.info(
+                        f"Spacy model {model_config.model_name} is already downloaded"
+                    )
+                except:
+                    logger.warning(
+                        f"Spacy model {model_config.model_name} is not downloaded"
+                    )
+                    logger.warning(
+                        f"Downloading spacy model {model_config.model_name}, it might take some time"
+                    )
+                    from spacy.cli import download  # type: ignore
+
+                    download(model_config.model_name)
+
+        # Create NLP engine based on configuration
+        provider = NlpEngineProvider(nlp_configuration=self.engine_config.dict())
+        nlp_engine = provider.create_engine()
+
+        # Pass the created NLP engine and supported_languages to the AnalyzerEngine
+        self._analyzer = AnalyzerEngine(
+            nlp_engine=nlp_engine, supported_languages=languages
+        )
+
+        # self._analyzer.registry.load_predefined_recognizers()
+        if self.entity_recognizers:
+            for entity_recognizer in self.entity_recognizers:
+                self._analyzer.registry.add_recognizer(entity_recognizer)
+
+        # Initialize the anonymizer with logger
+        self._anonymizer = AnonymizerEngine()
+
+    def analyze_input(  # type: ignore[override]
+        self,
+        source_response_list: List[TextPayload],
+        analyzer_config: Optional[PresidioPIIAnalyzerConfig] = None,
+        language: Optional[str] = "en",
+        **kwargs: Any,
+    ) -> List[TextPayload]:
+        if analyzer_config is None:
+            raise ValueError("analyzer_config can't be None")
+
+        analyzer_output: List[TextPayload] = []
+
+        for batch_responses in self.batchify(source_response_list, self.batch_size):
+            for source_response in batch_responses:
+                analyzer_result = self._analyzer.analyze(
+                    text=source_response.processed_text,
+                    entities=analyzer_config.entities,
+                    return_decision_process=analyzer_config.return_decision_process,
+                    language=language,
+                )
+
+                anonymized_result = None
+                if not analyzer_config.analyze_only:
+                    anonymizers_config = (
+                        analyzer_config.anonymizers_config or self.anonymizers_config
+                    )
+
+                    if (
+                        source_response.processed_text is not None
+                        and len(source_response.processed_text) > 0
+                    ):
+                        anonymized_result = self._anonymizer.anonymize(
+                            text=source_response.processed_text,
+                            operators=anonymizers_config,
+                            analyzer_results=analyzer_result,
+                        )
+
+                if (
+                    analyzer_config.replace_original_text
+                    and anonymized_result is not None
+                ):
+                    text = anonymized_result.text
+                else:
+                    text = source_response.processed_text
+
+                segmented_data = {
+                    "pii_data": {
+                        "analyzer_result": [vars(result) for result in analyzer_result],
+                        "anonymized_result": None
+                        if not anonymized_result
+                        else [vars(item) for item in anonymized_result.items],
+                        "anonymized_text": None
+                        if not anonymized_result
+                        else anonymized_result.text,
+                    }
+                }
+                if source_response.segmented_data:
+                    segmented_data = {
+                        **segmented_data,
+                        **source_response.segmented_data,
+                    }
+
+                analyzer_output.append(
+                    TextPayload(
+                        processed_text=text,
+                        meta=source_response.meta,
+                        segmented_data=segmented_data,
+                        source_name=source_response.source_name,
+                    )
+                )
+
+        return analyzer_output
diff --git a/obsei_module/obsei/analyzer/sentiment_analyzer.py b/obsei_module/obsei/analyzer/sentiment_analyzer.py
new file mode 100644
index 0000000000000000000000000000000000000000..a2864e63d191166cc9026bdee67619d3169f123e
--- /dev/null
+++ b/obsei_module/obsei/analyzer/sentiment_analyzer.py
@@ -0,0 +1,96 @@
+import logging
+from typing import Any, List, Optional
+
+from pydantic import PrivateAttr
+from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
+
+from obsei.analyzer.base_analyzer import (
+    BaseAnalyzer,
+    BaseAnalyzerConfig,
+)
+from obsei.payload import TextPayload
+from obsei.analyzer.classification_analyzer import (
+    ClassificationAnalyzerConfig,
+    ZeroShotClassificationAnalyzer,
+)
+
+logger = logging.getLogger(__name__)
+
+
+class VaderSentimentAnalyzer(BaseAnalyzer):
+    _model: SentimentIntensityAnalyzer = PrivateAttr()
+    TYPE: str = "Sentiment"
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+        self._model = SentimentIntensityAnalyzer()
+
+    def _get_sentiment_score_from_vader(self, text: str) -> float:
+        scores = self._model.polarity_scores(text)
+        return float(scores["compound"])
+
+    def analyze_input(
+        self,
+        source_response_list: List[TextPayload],
+        analyzer_config: Optional[BaseAnalyzerConfig] = None,
+        **kwargs: Any,
+    ) -> List[TextPayload]:
+        analyzer_output: List[TextPayload] = []
+
+        for batch_responses in self.batchify(source_response_list, self.batch_size):
+            for source_response in batch_responses:
+                classification_map = {}
+                sentiment_value = self._get_sentiment_score_from_vader(
+                    source_response.processed_text
+                )
+                if sentiment_value < 0.0:
+                    classification_map["negative"] = -sentiment_value
+                    classification_map["positive"] = (
+                        1.0 - classification_map["negative"]
+                    )
+                else:
+                    classification_map["positive"] = sentiment_value
+                    classification_map["negative"] = (
+                        1.0 - classification_map["positive"]
+                    )
+
+                segmented_data = {"classifier_data": classification_map}
+                if source_response.segmented_data:
+                    segmented_data = {
+                        **segmented_data,
+                        **source_response.segmented_data,
+                    }
+
+                analyzer_output.append(
+                    TextPayload(
+                        processed_text=source_response.processed_text,
+                        meta=source_response.meta,
+                        segmented_data=segmented_data,
+                        source_name=source_response.source_name,
+                    )
+                )
+
+        return analyzer_output
+
+
+class TransformersSentimentAnalyzerConfig(ClassificationAnalyzerConfig):
+    TYPE: str = "Sentiment"
+    labels: List[str] = ["positive", "negative"]
+    multi_class_classification: bool = False
+
+
+class TransformersSentimentAnalyzer(ZeroShotClassificationAnalyzer):
+    def analyze_input(  # type: ignore[override]
+        self,
+        source_response_list: List[TextPayload],
+        analyzer_config: Optional[TransformersSentimentAnalyzerConfig] = None,
+        **kwargs: Any,
+    ) -> List[TextPayload]:
+        return super().analyze_input(
+            source_response_list=source_response_list,
+            analyzer_config=analyzer_config,
+            add_positive_negative_labels=True,
+            **kwargs,
+        )
+
+
diff --git a/obsei_module/obsei/analyzer/translation_analyzer.py b/obsei_module/obsei/analyzer/translation_analyzer.py
new file mode 100644
index 0000000000000000000000000000000000000000..45d71e9643b1fc5d14c2437645e84f10244d8a1a
--- /dev/null
+++ b/obsei_module/obsei/analyzer/translation_analyzer.py
@@ -0,0 +1,70 @@
+from typing import Any, List, Optional
+
+from pydantic import PrivateAttr
+from transformers import pipeline, Pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
+
+from obsei.analyzer.base_analyzer import (
+    BaseAnalyzer,
+    BaseAnalyzerConfig,
+    MAX_LENGTH,
+)
+from obsei.payload import TextPayload
+
+
+class TranslationAnalyzer(BaseAnalyzer):
+    _pipeline: Pipeline = PrivateAttr()
+    _max_length: int = PrivateAttr()
+    TYPE: str = "Translation"
+    model_name_or_path: str
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+        tokenizer = AutoTokenizer.from_pretrained(self.model_name_or_path)
+        model = AutoModelForSeq2SeqLM.from_pretrained(self.model_name_or_path)
+        self._pipeline = pipeline(
+            "translation", model=model, tokenizer=tokenizer, device=self._device_id
+        )
+        if hasattr(self._pipeline.model.config, "max_position_embeddings"):
+            self._max_length = self._pipeline.model.config.max_position_embeddings
+        else:
+            self._max_length = MAX_LENGTH
+
+    def analyze_input(
+        self,
+        source_response_list: List[TextPayload],
+        analyzer_config: Optional[BaseAnalyzerConfig] = None,
+        **kwargs: Any,
+    ) -> List[TextPayload]:
+
+        analyzer_output = []
+
+        for batch_responses in self.batchify(source_response_list, self.batch_size):
+            texts = [
+                source_response.processed_text[: self._max_length]
+                for source_response in batch_responses
+            ]
+
+            batch_predictions = self._pipeline(texts)
+
+            for prediction, source_response in zip(batch_predictions, batch_responses):
+                segmented_data = {
+                    "translation_data": {
+                        "original_text": source_response.processed_text
+                    }
+                }
+                if source_response.segmented_data:
+                    segmented_data = {
+                        **segmented_data,
+                        **source_response.segmented_data,
+                    }
+
+                analyzer_output.append(
+                    TextPayload(
+                        processed_text=prediction["translation_text"],
+                        meta=source_response.meta,
+                        segmented_data=segmented_data,
+                        source_name=source_response.source_name,
+                    )
+                )
+
+        return analyzer_output
diff --git a/obsei_module/obsei/configuration.py b/obsei_module/obsei/configuration.py
new file mode 100644
index 0000000000000000000000000000000000000000..cd55fb90e4fcff3236f7e6760e5affdd1974b936
--- /dev/null
+++ b/obsei_module/obsei/configuration.py
@@ -0,0 +1,37 @@
+import logging
+from typing import Any, Dict, Optional, Union
+
+import yaml
+from pydantic import Field
+from pydantic_settings import BaseSettings
+
+from obsei.misc.utils import dict_to_object
+
+logger = logging.getLogger(__name__)
+
+
+class ObseiConfiguration(BaseSettings):
+    configuration: Optional[Dict[str, Any]] = None
+    config_path: Optional[str] = Field(None, env="obsei_config_path")
+    config_filename: Optional[str] = Field(None, env="obsei_config_filename")
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+
+        if self.configuration is None:
+            self.configuration = yaml.load(
+                open(f"{self.config_path}/{self.config_filename}", "r"),
+                Loader=yaml.FullLoader,
+            )
+        logger.debug(f"Configuration: {self.configuration}")
+
+    def initialize_instance(self, key_name: Optional[str] = None) -> Union[Any]:
+        if (
+            key_name is None
+            or self.configuration is None
+            or key_name not in self.configuration
+            or not self.configuration[key_name]
+        ):
+            logger.warning(f"{key_name} not exist in configuration")
+            return None
+        return dict_to_object(self.configuration[key_name])
diff --git a/obsei_module/obsei/misc/__init__.py b/obsei_module/obsei/misc/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/obsei_module/obsei/misc/__pycache__/__init__.cpython-311.pyc b/obsei_module/obsei/misc/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e6fc65546781843e348f7fb2b9888bf70bf93d54
Binary files /dev/null and b/obsei_module/obsei/misc/__pycache__/__init__.cpython-311.pyc differ
diff --git a/obsei_module/obsei/misc/gpu_util.py b/obsei_module/obsei/misc/gpu_util.py
new file mode 100644
index 0000000000000000000000000000000000000000..fc442f1d8fbee20066ffaf520ecfb26964636e23
--- /dev/null
+++ b/obsei_module/obsei/misc/gpu_util.py
@@ -0,0 +1,18 @@
+import torch
+
+
+def is_gpu_available() -> bool:
+    return torch.cuda.is_available()
+
+
+def get_device_id(device: str) -> int:
+    if device == "cpu":
+        return -1
+    elif device == "auto":
+        return 0 if is_gpu_available() else -1
+    elif device.startswith("cuda:"):
+        device_no = device.replace("cuda:", "")
+        if device_no.isnumeric():
+            return int(device_no)
+
+    raise Exception(f"Invalid device: '{device}'")
diff --git a/obsei_module/obsei/misc/utils.py b/obsei_module/obsei/misc/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..e192578ed957b5946cc3f3f1397041040fc8b054
--- /dev/null
+++ b/obsei_module/obsei/misc/utils.py
@@ -0,0 +1,210 @@
+import json
+import math
+import time
+
+import dateparser
+from datetime import datetime, timezone
+from importlib import import_module
+from typing import Any, Dict, Optional, Union
+
+from bs4 import BeautifulSoup
+from bs4.element import Comment
+from dateutil.relativedelta import relativedelta
+
+DATETIME_STRING_PATTERN = "%Y-%m-%dT%H:%M:%SZ"
+DEFAULT_LOOKUP_PERIOD = "1h"
+
+
+# Used from https://stackoverflow.com/a/52081812 and modified
+def flatten_dict(
+    dictionary: Dict[str, Any],
+    round_the_float: bool = True,
+    float_round_format_str: str = ".2f",
+    separator: str = "_",
+) -> Dict[str, Any]:
+    out: Dict[str, Any] = {}
+    for key, val in dictionary.items():
+        if isinstance(val, dict):
+            val = [val]
+        if isinstance(val, list):
+            for sub_dict in val:
+                deeper = flatten_dict(sub_dict).items()
+                out.update({key + separator + key2: val2 for key2, val2 in deeper})
+        elif isinstance(val, float) and round_the_float:
+            out[key] = format(val, float_round_format_str)
+        else:
+            out[key] = val
+    return out
+
+
+def obj_to_json(obj: Any, sort_keys: bool = False, indent: Optional[int] = None) -> Union[bytes, None]:
+    if obj is None:
+        return None
+    return json.dumps(
+        obj,
+        default=datetime_handler,
+        ensure_ascii=False,
+        sort_keys=sort_keys,
+        indent=indent,
+    ).encode("utf8")
+
+
+def obj_to_markdown(
+    obj: Any,
+    level: int = 1,
+    str_enclose_start: Optional[str] = None,
+    str_enclose_end: Optional[str] = None,
+) -> str:
+    key_prefix = "*" * level
+
+    markdowns = []
+    if is_collection(obj):
+        add_key = True
+        if hasattr(obj, "__dict__"):
+            item_view = obj.__dict__.items()
+        elif isinstance(obj, dict):
+            item_view = obj.items()
+        else:
+            add_key = False
+            item_view = enumerate(obj)
+
+        for key, val in item_view:
+            if add_key:
+                header = f"{key_prefix} {key}"
+            else:
+                header = key_prefix
+            if is_collection(val):
+                child_markdown = obj_to_markdown(
+                    obj=val,
+                    level=level + 1,
+                    str_enclose_start=str_enclose_start,
+                    str_enclose_end=str_enclose_end,
+                )
+                markdowns.append(f"{header}\n{child_markdown}")
+            elif str_enclose_start is not None and isinstance(val, str):
+                markdowns.append(
+                    f"{header}:\n{str_enclose_start}{val}{str_enclose_end}"
+                )
+            else:
+                markdowns.append(f"{header}: {val}")
+    elif str_enclose_start is not None and isinstance(obj, str):
+        markdowns.append(f"{key_prefix}:\n{str_enclose_start}{obj}{str_enclose_end}")
+    else:
+        markdowns.append(f"{key_prefix}: {obj}")
+
+    return "\n".join(markdowns)
+
+
+def is_collection(obj: Any) -> bool:
+    return isinstance(obj, (dict, list)) or hasattr(obj, "__dict__")
+
+
+# Copied from searchtweets-v2 and bit modified
+def convert_utc_time(datetime_str: str) -> datetime:
+    """
+    Handles datetime argument conversion to the Labs API format, which is
+    `YYYY-MM-DDTHH:mm:ssZ`.
+    Flexible passing of date formats in the following types::
+
+        - YYYYmmDDHHMM
+        - YYYY-mm-DD
+        - YYYY-mm-DD HH:MM
+        - YYYY-mm-DDTHH:MM
+        - 2m (set start_time to two months ago)
+        - 3d (set start_time to three days ago)
+        - 12h (set start_time to twelve hours ago)
+        - 15m (set start_time to fifteen minutes ago)
+
+    Args:
+        datetime_str (str): valid formats are listed above.
+
+    Returns:
+        string of ISO formatted date.
+    """
+    try:
+        if len(datetime_str) <= 5:
+            _date = datetime.utcnow()
+            # parse out numeric character.
+            num = int(datetime_str[:-1])
+            if "d" in datetime_str:
+                _date = _date + relativedelta(days=-num)
+            elif "h" in datetime_str:
+                _date = _date + relativedelta(hours=-num)
+            elif "m" in datetime_str:
+                _date = _date + relativedelta(minutes=-num)
+            elif "M" in datetime_str:
+                _date = _date + relativedelta(months=-num)
+            elif "Y" in datetime_str:
+                _date = _date + relativedelta(years=-num)
+        elif not {"-", ":"} & set(datetime_str):
+            _date = datetime.strptime(datetime_str, "%Y%m%d%H%M")
+        elif "T" in datetime_str:
+            _date = datetime.strptime(datetime_str, DATETIME_STRING_PATTERN)
+        else:
+            _date = datetime.strptime(datetime_str, "%Y-%m-%d %H:%M")
+
+    except ValueError:
+        _date = datetime.strptime(datetime_str, "%Y-%m-%d")
+
+    return _date.replace(tzinfo=timezone.utc)
+
+
+def convert_datetime_str_to_epoch(datetime_str: str) -> Optional[int]:
+    parsed_datetime = dateparser.parse(datetime_str)
+    if not parsed_datetime:
+        return None
+    unix_timestamp = time.mktime(parsed_datetime.timetuple())
+    return math.trunc(unix_timestamp)
+
+
+def tag_visible(element: Any) -> bool:
+    if element.parent.name in [
+        "style",
+        "script",
+        "head",
+        "title",
+        "meta",
+        "[document]",
+    ]:
+        return False
+    if isinstance(element, Comment):
+        return False
+    return True
+
+
+def text_from_html(body: Union[str, bytes]) -> str:
+    soup = BeautifulSoup(body, "html.parser")
+    texts = soup.findAll(text=True)
+    visible_texts = filter(tag_visible, texts)
+    return " ".join(t.strip() for t in visible_texts)
+
+
+def dict_to_object(
+    dictionary: Dict[str, Any],
+    class_name_key: Optional[str] = "_target_",
+    full_class_name: Optional[str] = None,
+) -> Any:
+    new_dict: Dict[str, Any] = dict()
+    for k, v in dictionary.items():
+        if k == class_name_key:
+            full_class_name = v
+        elif isinstance(v, Dict):
+            new_dict[k] = dict_to_object(dictionary=v, class_name_key=class_name_key)
+        else:
+            new_dict[k] = v
+
+    if full_class_name is None:
+        return new_dict
+
+    module_name, class_name = tuple(full_class_name.rsplit(".", 1))
+    module = import_module(module_name)
+    class_ref = getattr(module, class_name)
+    return class_ref(**new_dict)
+
+
+def datetime_handler(x: Any) -> Optional[Any]:
+    if x is None:
+        return None
+    elif isinstance(x, datetime):
+        return x.isoformat()
+    return vars(x) if hasattr(x, "__dict__") else x
diff --git a/obsei_module/obsei/misc/web_search.py b/obsei_module/obsei/misc/web_search.py
new file mode 100644
index 0000000000000000000000000000000000000000..4b8f92c8c76226b05102c14cc992d1b323e39eaa
--- /dev/null
+++ b/obsei_module/obsei/misc/web_search.py
@@ -0,0 +1,35 @@
+from typing import Any, Dict, List, Optional
+
+import requests
+from requests.adapters import HTTPAdapter
+from urllib3 import Retry
+
+GOOGLE_SEARCH_URL = "https://www.google.com/search"
+
+
+# Code is influenced from https://github.com/cowboy-bebug/app-store-scraper
+def perform_search(
+    request_url: str,
+    query: str,
+    search_url: str = GOOGLE_SEARCH_URL,
+    search_country: Optional[str] = None,
+    headers: Optional[Dict[str, Any]] = None,
+    total: int = 3,
+    backoff_factor: int = 3,
+    status_force_list: Optional[List[int]] = None,
+) -> requests.Response:
+
+    params = {"q": query}
+    if search_country:
+        params["cr"] = search_country
+
+    if not status_force_list:
+        status_force_list = [404, 429]
+    retries = Retry(
+        total=total,
+        backoff_factor=backoff_factor,
+        status_forcelist=status_force_list,
+    )
+    with requests.Session() as s:
+        s.mount(request_url, HTTPAdapter(max_retries=retries))
+        return s.get(search_url, headers=headers, params=params)
diff --git a/obsei_module/obsei/misc/youtube_reviews_scrapper.py b/obsei_module/obsei/misc/youtube_reviews_scrapper.py
new file mode 100644
index 0000000000000000000000000000000000000000..090c2e3d529b858856155d508eee5bb9f2ac3158
--- /dev/null
+++ b/obsei_module/obsei/misc/youtube_reviews_scrapper.py
@@ -0,0 +1,169 @@
+# Code in this file is copied from https://github.com/egbertbouman/youtube-comment-downloader/blob/master/youtube_comment_downloader/downloader.py
+# and modified to fit the needs of this project. When code from youtube-comment-downloader was copied it was MIT licensed.
+# Code Commit: https://github.com/egbertbouman/youtube-comment-downloader/commit/9a15b8e3fbaebad660875409fb1bbe74db17f304
+
+import json
+import logging
+import time
+import re
+from datetime import datetime, timezone
+
+import dateparser
+from typing import Optional, Any, List, Dict, Generator
+
+import requests
+from pydantic import BaseModel
+from requests import Session
+
+logger = logging.getLogger(__name__)
+
+
+class YouTubeCommentExtractor(BaseModel):
+    _YT_URL: str = 'https://www.youtube.com'
+    _YT_CFG_REGEX: str = r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;'
+    _YT_INITIAL_DATA_REGEX: str = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;\s*(?:var\s+meta|</script|\n)'
+    video_url: str
+    user_agent: str = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36'
+    sort_by: int = 1  # 0 = sort by popular, 1 = sort by recent
+    max_comments: Optional[int] = 20
+    fetch_replies: bool = False
+    lang_code: Optional[str] = None
+    sleep_time: float = 0.1
+    request_retries: int = 5
+
+    def __init__(self, **kwargs: Any):
+        super().__init__(**kwargs)
+
+        if self.sort_by not in [0, 1]:
+            raise ValueError('sort_by must be either 0 or 1')
+
+    @staticmethod
+    def _regex_search(text: str, pattern: str, group: int = 1) -> str:
+        match = re.search(pattern, text)
+        return match.group(group) if match else ''
+
+    def _ajax_request(self, session: Session, endpoint: Dict[str, Any], ytcfg: Dict[str, Any]) -> Any:
+        url = self._YT_URL + endpoint['commandMetadata']['webCommandMetadata']['apiUrl']
+
+        data = {'context': ytcfg['INNERTUBE_CONTEXT'],
+                'continuation': endpoint['continuationCommand']['token']}
+
+        for _ in range(self.request_retries):
+            response = session.post(url, params={'key': ytcfg['INNERTUBE_API_KEY']}, json=data)
+            if response.status_code == 200:
+                return response.json()
+            if response.status_code in [403, 413]:
+                return {}
+            else:
+                time.sleep(self.sleep_time)
+
+    @staticmethod
+    def _search_dict(partial: Any, search_key: str) -> Generator[Any, Any, None]:
+        stack = [partial]
+        while stack:
+            current_item = stack.pop()
+            if isinstance(current_item, dict):
+                for key, value in current_item.items():
+                    if key == search_key:
+                        yield value
+                    else:
+                        stack.append(value)
+            elif isinstance(current_item, list):
+                for value in current_item:
+                    stack.append(value)
+
+    def _fetch_comments(self, until_datetime: Optional[datetime] = None) -> Generator[Any, Any, None]:
+        session = requests.Session()
+        session.headers['User-Agent'] = self.user_agent
+        response = session.get(self.video_url)
+
+        if response.request and response.request.url and 'uxe=' in response.request.url:
+            session.cookies.set('CONSENT', 'YES+cb', domain='.youtube.com')  # type: ignore[no-untyped-call]
+            response = session.get(self.video_url)
+
+        html = response.text
+        ytcfg = json.loads(self._regex_search(html, self._YT_CFG_REGEX))
+        if not ytcfg:
+            return  # Unable to extract configuration
+        if self.lang_code:
+            ytcfg['INNERTUBE_CONTEXT']['client']['hl'] = self.lang_code
+
+        data = json.loads(self._regex_search(html, self._YT_INITIAL_DATA_REGEX))
+
+        section = next(self._search_dict(data, 'itemSectionRenderer'), None)
+        renderer = next(self._search_dict(section, 'continuationItemRenderer'), None) if section else None
+        if not renderer:
+            # Comments disabled?
+            return
+
+        needs_sorting = self.sort_by != 0
+        continuations = [renderer['continuationEndpoint']]
+        while continuations:
+            continuation = continuations.pop()
+            response = self._ajax_request(session, continuation, ytcfg)
+
+            if not response:
+                break
+            if list(self._search_dict(response, 'externalErrorMessage')):
+                logger.warning('Error returned from server: %s', next(self._search_dict(response, 'externalErrorMessage')))
+                return
+
+            if needs_sorting:
+                sub_menu: Dict[str, Any] = next(self._search_dict(response, 'sortFilterSubMenuRenderer'), {})
+                sort_menu = sub_menu.get('subMenuItems', [])
+                if self.sort_by < len(sort_menu):
+                    continuations = [sort_menu[self.sort_by]['serviceEndpoint']]
+                    needs_sorting = False
+                    continue
+                # TODO: Fix it. Causing observer to fail silently\
+                logger.warning("Unable to set sorting")
+                # raise RuntimeError('Failed to set sorting')
+
+            actions = list(self._search_dict(response, 'reloadContinuationItemsCommand')) + \
+                      list(self._search_dict(response, 'appendContinuationItemsAction'))
+
+            for action in actions:
+                for item in action.get('continuationItems', []):
+                    if action['targetId'] == 'comments-section':
+                        # Process continuations for comments and replies.
+                        continuations[:0] = [ep for ep in self._search_dict(item, 'continuationEndpoint')]
+                    if self.fetch_replies:
+                        # TODO: Fix it. This functionality is broken
+                        if action['targetId'].startswith('comment-replies-item') and 'continuationItemRenderer' in item:
+                            # Process the 'Show more replies' button
+                            continuations.append(next(self._search_dict(item, 'buttonRenderer'))['command'])
+
+            for comment in reversed(list(self._search_dict(response, 'commentRenderer'))):
+                if not self.fetch_replies and "." in comment['commentId']:
+                    continue
+
+                comment_time_string = comment['publishedTimeText']['runs'][0]['text']
+                comment_time_string = comment_time_string or ''
+                comment_time = dateparser.parse(
+                    comment_time_string.split('(edited)', 1)[0].strip(),
+                )
+
+                if comment_time:
+                    comment_time = comment_time.replace(tzinfo=timezone.utc)
+                    if until_datetime and until_datetime > comment_time:
+                        return
+
+                yield {'comment_id': comment['commentId'],
+                       'text': ''.join([c['text'] for c in comment['contentText'].get('runs', [])]),
+                       'time': comment_time,
+                       'author': comment.get('authorText', {}).get('simpleText', ''),
+                       'channel': comment['authorEndpoint']['browseEndpoint'].get('browseId', ''),
+                       'votes': comment.get('voteCount', {}).get('simpleText', '0'),
+                       'photo': comment['authorThumbnail']['thumbnails'][-1]['url'],
+                       'heart': next(self._search_dict(comment, 'isHearted'), False)}
+
+            time.sleep(self.sleep_time)
+
+    def fetch_comments(self, until_datetime: Optional[datetime] = None) -> List[Dict[str, Any]]:
+        comments: List[Dict[str, Any]] = []
+        for comment in self._fetch_comments(until_datetime=until_datetime):
+            comments.append(comment)
+            if self.max_comments and self.max_comments == len(comments):
+                break
+
+        return comments
diff --git a/obsei_module/obsei/payload.py b/obsei_module/obsei/payload.py
new file mode 100644
index 0000000000000000000000000000000000000000..c9673382658962c4a0427f66480ff58674e1219c
--- /dev/null
+++ b/obsei_module/obsei/payload.py
@@ -0,0 +1,33 @@
+from typing import Any, Dict, Optional
+
+from pydantic import BaseModel, Field
+
+
+class BasePayload(BaseModel):
+    segmented_data: Dict[str, Any] = Field({})
+    meta: Dict[str, Any] = Field({})
+    source_name: Optional[str] = "Undefined"
+
+    class Config:
+        arbitrary_types_allowed = True
+
+
+class TextPayload(BasePayload):
+    processed_text: str
+
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "processed_text": self.processed_text,
+            "segmented_data": self.segmented_data,
+            "meta": self.meta,
+            "source_name": self.source_name,
+        }
+
+    def is_contains_classification_payload(self) -> bool:
+        if self.segmented_data:
+            if "classifier_data" in self.segmented_data:
+                return True
+        return False
+
+    class Config:
+        arbitrary_types_allowed = True
diff --git a/obsei_module/obsei/postprocessor/__init__.py b/obsei_module/obsei/postprocessor/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/obsei_module/obsei/postprocessor/__pycache__/__init__.cpython-311.pyc b/obsei_module/obsei/postprocessor/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..275db74d4ac6ff41df007ae5dff04efe045daef1
Binary files /dev/null and b/obsei_module/obsei/postprocessor/__pycache__/__init__.cpython-311.pyc differ
diff --git a/obsei_module/obsei/postprocessor/base_postprocessor.py b/obsei_module/obsei/postprocessor/base_postprocessor.py
new file mode 100644
index 0000000000000000000000000000000000000000..105b67f8fb3e816ba65b4434ed7f4e27211f8b56
--- /dev/null
+++ b/obsei_module/obsei/postprocessor/base_postprocessor.py
@@ -0,0 +1,26 @@
+from typing import List, Any
+
+from pydantic_settings import BaseSettings
+
+from obsei.payload import TextPayload
+from abc import abstractmethod
+
+
+class BasePostprocessorConfig(BaseSettings):
+    TYPE: str = "Base"
+
+    class Config:
+        multi_label = True
+
+
+class BasePostprocessor(BaseSettings):
+    TYPE: str = "Base"
+
+    @abstractmethod
+    def postprocess_input(
+        self, input_list: List[TextPayload], config: BasePostprocessorConfig, **kwargs: Any
+    ) -> List[TextPayload]:
+        pass
+
+    class Config:
+        arbitrary_types_allowed = True
diff --git a/obsei_module/obsei/postprocessor/inference_aggregator.py b/obsei_module/obsei/postprocessor/inference_aggregator.py
new file mode 100644
index 0000000000000000000000000000000000000000..cd1ee12a8b3c06bfff5e28088034fdc941ac7159
--- /dev/null
+++ b/obsei_module/obsei/postprocessor/inference_aggregator.py
@@ -0,0 +1,55 @@
+from typing import List, Optional, Dict, Any
+
+from obsei.payload import TextPayload
+from obsei.postprocessor.base_postprocessor import (
+    BasePostprocessorConfig,
+    BasePostprocessor
+)
+from obsei.postprocessor.inference_aggregator_function import BaseInferenceAggregateFunction
+from obsei.preprocessor.text_splitter import TextSplitterPayload
+
+
+class InferenceAggregatorConfig(BasePostprocessorConfig):
+    aggregate_function: BaseInferenceAggregateFunction
+
+
+class InferenceAggregator(BasePostprocessor):
+    def postprocess_input(  # type: ignore[override]
+        self, input_list: List[TextPayload], config: InferenceAggregatorConfig, **kwargs: Any
+    ) -> List[TextPayload]:
+
+        aggregated_payloads = self.segregate_payload(input_list)
+        postproces_output: List[TextPayload] = []
+        for key, payload_list in aggregated_payloads.items():
+            postproces_output.extend(
+                config.aggregate_function.execute(payload_list)
+            )
+
+        return postproces_output
+
+    @staticmethod
+    def segregate_payload(
+        input_list: List[TextPayload],
+    ) -> Dict[str, List[TextPayload]]:
+        segregated_payload: Dict[str, List[TextPayload]] = {}
+
+        # segregate payload
+        for idx, payload in enumerate(input_list):
+            splitter_data: Optional[TextSplitterPayload] = (
+                payload.meta.get("splitter", None) if payload.meta else None
+            )
+            doc_id = splitter_data.document_id if splitter_data else str(idx)
+            if doc_id not in segregated_payload:
+                segregated_payload[doc_id] = []
+            segregated_payload[doc_id].append(payload)
+
+        # sort based on chunk id
+        for doc_id, payloads in segregated_payload.items():
+            if (
+                len(payloads) > 0
+                and payloads[0].meta
+                and payloads[0].meta.get("splitter", None)
+            ):
+                payloads.sort(key=lambda x: x.meta["splitter"].chunk_id)  # type: ignore[no-any-return]
+
+        return segregated_payload
diff --git a/obsei_module/obsei/postprocessor/inference_aggregator_function.py b/obsei_module/obsei/postprocessor/inference_aggregator_function.py
new file mode 100644
index 0000000000000000000000000000000000000000..d09164b6161c667ff463ff8c1754ed49ef49604b
--- /dev/null
+++ b/obsei_module/obsei/postprocessor/inference_aggregator_function.py
@@ -0,0 +1,127 @@
+import logging
+from abc import abstractmethod
+from typing import Any, Dict, List, Tuple
+
+from pydantic import BaseModel
+
+from obsei.payload import TextPayload
+
+logger = logging.getLogger(__name__)
+
+
+class BaseInferenceAggregateFunction(BaseModel):
+    @abstractmethod
+    def execute(
+        self, input_list: List[TextPayload], **kwargs: Any
+    ) -> List[TextPayload]:
+        pass
+
+    @staticmethod
+    def _extract_merged_parameters(
+        input_list: List[TextPayload],
+    ) -> Tuple[List[str], int, Dict[str, Any]]:
+        document_length: int = 0
+        meta: Dict[str, Any] = {}
+        doc_text: List[str] = []
+        # Merge meta across payload and collect score keys
+        for payload in input_list:
+            document_length += len(payload.processed_text)
+            meta = {**meta, **payload.meta} if payload.meta else meta
+            # Remove splitter key from meta
+            meta.pop("splitter")
+            doc_text.append(payload.processed_text)
+        return doc_text, document_length, meta
+
+
+class ClassificationAverageScore(BaseInferenceAggregateFunction):
+    name: str = "ClassificationAverageScore"
+    default_value: float = 0.0
+
+    def execute(
+        self, input_list: List[TextPayload], **kwargs: Any
+    ) -> List[TextPayload]:
+        if len(input_list) == 0:
+            logger.warning("Can't aggregate empty list")
+            return input_list
+
+        if not input_list[0].is_contains_classification_payload():
+            logger.warning(
+                "ClassificationAverage supports Classification and Sentiment Analyzers only"
+            )
+            return input_list
+
+        default_value = kwargs.get("default_value", self.default_value)
+
+        source_name = input_list[0].source_name
+
+        doc_text, document_length, meta = self._extract_merged_parameters(input_list)
+
+        # Perform average based on chunk length
+        scores: Dict[str, float] = {}
+        for payload in input_list:
+            if payload.segmented_data:
+                for key, value in payload.segmented_data.get("classifier_data", {}).items():
+                    ratio = len(payload.processed_text) / document_length
+                    scores[key] = scores.get(key, default_value) + value * ratio
+
+        return [
+            TextPayload(
+                processed_text=" ".join(doc_text),
+                meta=meta,
+                segmented_data={
+                    "aggregator_data": {
+                        "avg_score": scores,
+                        "aggregator_name": self.name,
+                    }
+                },
+                source_name=source_name,
+            )
+        ]
+
+
+class ClassificationMaxCategories(BaseInferenceAggregateFunction):
+    name: str = "ClassificationMaxCategories"
+    score_threshold: float = 0.5
+
+    def execute(
+        self, input_list: List[TextPayload], **kwargs: Any
+    ) -> List[TextPayload]:
+        if len(input_list) == 0:
+            logger.warning("Can't aggregate empty list")
+            return input_list
+
+        if not input_list[0].is_contains_classification_payload():
+            logger.warning(
+                "ClassificationAverage supports Classification and Sentiment Analyzers only"
+            )
+            return input_list
+
+        score_threshold = kwargs.get("score_threshold", self.score_threshold)
+
+        source_name = input_list[0].source_name
+
+        doc_text, _, meta = self._extract_merged_parameters(input_list)
+
+        max_scores: Dict[str, float] = {}
+        category_count: Dict[str, int] = {}
+        for payload in input_list:
+            if payload.segmented_data:
+                for key, value in payload.segmented_data.get("classifier_data", {}).items():
+                    if value > score_threshold:
+                        category_count[key] = category_count.get(key, 0) + 1
+                        max_scores[key] = max(max_scores.get(key, 0.0), value)
+
+        return [
+            TextPayload(
+                processed_text=" ".join(doc_text),
+                meta=meta,
+                segmented_data={
+                    "aggregator_data": {
+                        "category_count": category_count,
+                        "max_scores": max_scores,
+                        "aggregator_name": self.name,
+                    }
+                },
+                source_name=source_name,
+            )
+        ]
diff --git a/obsei_module/obsei/postprocessor/test3.py b/obsei_module/obsei/postprocessor/test3.py
new file mode 100644
index 0000000000000000000000000000000000000000..f94343c010e9c0449f99bab37407a51b707d221d
--- /dev/null
+++ b/obsei_module/obsei/postprocessor/test3.py
@@ -0,0 +1,25 @@
+from obsei.analyzer.classification_analyzer import ClassificationAnalyzerConfig, ZeroShotClassificationAnalyzer
+from obsei.analyzer.sentiment_analyzer import *
+
+# Tạo đối tượng cấu hình phân tích
+transformers_analyzer_config = TransformersSentimentAnalyzerConfig(
+    labels=["positive", "negative"],
+    multi_class_classification=False,
+    add_positive_negative_labels=True
+)
+text_samples = [
+    "I love this product, it's amazing!",
+    "I hate this, it's terrible.",
+    "I'm feeling okay, not too good, not too bad.",
+]
+
+# Chuyển văn bản thành đối tượng TextPayload
+source_responses = [TextPayload(processed_text=text) for text in text_samples]
+# Tạo đối tượng phân tích cảm xúc với TransformersSentimentAnalyzer
+transformers_analyzer = TransformersSentimentAnalyzer(model_name_or_path="facebook/bart-large-mnli", device="auto")
+
+# Phân tích cảm xúc với TransformersSentimentAnalyzer và truyền analyzer_config
+transformers_results = transformers_analyzer.analyze_input(source_responses, analyzer_config=transformers_analyzer_config)
+print(transformers_results)
+
+
diff --git a/obsei_module/obsei/preprocessor/__init__.py b/obsei_module/obsei/preprocessor/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/obsei_module/obsei/preprocessor/__pycache__/__init__.cpython-311.pyc b/obsei_module/obsei/preprocessor/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8daf7edeafb15f33b664eba9bdf60a5686e0d40b
Binary files /dev/null and b/obsei_module/obsei/preprocessor/__pycache__/__init__.cpython-311.pyc differ
diff --git a/obsei_module/obsei/preprocessor/base_preprocessor.py b/obsei_module/obsei/preprocessor/base_preprocessor.py
new file mode 100644
index 0000000000000000000000000000000000000000..5af9321e5c8466f5f32a14a84b9114083796919e
--- /dev/null
+++ b/obsei_module/obsei/preprocessor/base_preprocessor.py
@@ -0,0 +1,26 @@
+from abc import abstractmethod
+from typing import List, Any
+
+from pydantic_settings import BaseSettings
+
+from obsei.payload import TextPayload
+
+
+class BaseTextProcessorConfig(BaseSettings):
+    TYPE: str = "Base"
+
+    class Config:
+        arbitrary_types_allowed = True
+
+
+class BaseTextPreprocessor(BaseSettings):
+    TYPE: str = "Base"
+
+    @abstractmethod
+    def preprocess_input(
+        self, input_list: List[TextPayload], config: BaseTextProcessorConfig, **kwargs: Any
+    ) -> List[TextPayload]:
+        pass
+
+    class Config:
+        arbitrary_types_allowed = True
diff --git a/obsei_module/obsei/preprocessor/text_cleaner.py b/obsei_module/obsei/preprocessor/text_cleaner.py
new file mode 100644
index 0000000000000000000000000000000000000000..43a95662af3f833e0ec9b82254a095ef36ecf7e6
--- /dev/null
+++ b/obsei_module/obsei/preprocessor/text_cleaner.py
@@ -0,0 +1,76 @@
+import traceback
+import logging
+from typing import List, Any, Optional, Tuple
+
+from obsei.payload import TextPayload
+from obsei.preprocessor.base_preprocessor import (
+    BaseTextPreprocessor,
+    BaseTextProcessorConfig,
+)
+from obsei.preprocessor.text_cleaning_function import TextCleaningFunction, ToLowerCase, RemoveWhiteSpaceAndEmptyToken, \
+    RemovePunctuation, RemoveSpecialChars, DecodeUnicode, RemoveDateTime, ReplaceDomainKeywords, TokenStemming, \
+    RemoveStopWords
+from obsei.preprocessor.text_tokenizer import BaseTextTokenizer, NLTKTextTokenizer
+
+cleaner_logger: logging.Logger = logging.getLogger(__name__)
+
+
+class TextCleanerConfig(BaseTextProcessorConfig):
+    cleaning_functions: Optional[List[TextCleaningFunction]] = None
+    stop_words_language: Optional[str] = "english"
+    stop_words: Optional[List[str]] = None
+    domain_keywords: Optional[Tuple[str, str]] = None
+    disable_tokenization: bool = False
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+
+        if not self.cleaning_functions:
+            self.cleaning_functions = [
+                ToLowerCase(),
+                RemoveWhiteSpaceAndEmptyToken(),
+                RemovePunctuation(),
+                RemoveSpecialChars(),
+                DecodeUnicode(),
+                RemoveDateTime(),
+                ReplaceDomainKeywords(domain_keywords=self.domain_keywords),
+                TokenStemming(),
+                RemoveStopWords(
+                    language=self.stop_words_language, stop_words=self.stop_words
+                ),
+                RemoveWhiteSpaceAndEmptyToken(),
+            ]
+
+
+class TextCleaner(BaseTextPreprocessor):
+    text_tokenizer: Optional[BaseTextTokenizer] = None
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+        self.text_tokenizer = self.text_tokenizer or NLTKTextTokenizer()
+
+    def preprocess_input(  # type: ignore[override]
+        self,
+        input_list: List[TextPayload],
+        config: TextCleanerConfig,
+        **kwargs: Any,
+    ) -> List[TextPayload]:
+        if config.cleaning_functions is None:
+            return input_list
+        for input_data in input_list:
+            if self.text_tokenizer is None or config.disable_tokenization:
+                tokens = [input_data.processed_text]
+            else:
+                tokens = self.text_tokenizer.tokenize_text(
+                    input_data.processed_text
+                )
+            for cleaning_function in config.cleaning_functions:
+                try:
+                    tokens = cleaning_function.execute(tokens)
+                except Exception as ex:
+                    cleaner_logger.warning(f"Received exception: {ex}")
+                    traceback.print_exc()
+
+            input_data.processed_text = " ".join(tokens)
+
+        return input_list
diff --git a/obsei_module/obsei/preprocessor/text_cleaning_function.py b/obsei_module/obsei/preprocessor/text_cleaning_function.py
new file mode 100644
index 0000000000000000000000000000000000000000..0ec4b98986fbfd3839d86ff486e5928705ba92ce
--- /dev/null
+++ b/obsei_module/obsei/preprocessor/text_cleaning_function.py
@@ -0,0 +1,176 @@
+import logging
+import re
+import string
+from abc import abstractmethod
+from typing import Any, List, Optional, Tuple
+from unicodedata import normalize
+
+import nltk
+import spacy
+from dateutil.parser import parse
+from nltk.corpus import stopwords
+from pydantic import BaseModel, PrivateAttr, Field
+from spacy import Language  # type: ignore
+from spacy.cli import download  # type: ignore
+
+cleaner_func_logger: logging.Logger = logging.getLogger(__name__)
+
+
+class TextCleaningFunction(BaseModel):
+    @abstractmethod
+    def execute(self, tokens: List[str], **kwargs: Any) -> List[str]:
+        pass
+
+
+class ToLowerCase(TextCleaningFunction):
+    def execute(self, tokens: List[str], **kwargs: Any) -> List[str]:
+        return [token.lower() for token in tokens]
+
+
+class RemoveWhiteSpaceAndEmptyToken(TextCleaningFunction):
+    def execute(self, tokens: List[str], **kwargs: Any) -> List[str]:
+        striped_tokens = [token.strip() for token in tokens]
+        return [token for token in striped_tokens if token != ""]
+
+
+# Removes words that don't add any meaning to the sequence
+class RemoveStopWords(TextCleaningFunction):
+    stop_words: Optional[List[str]] = None
+    language: Optional[str] = "english"
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+        if not self.stop_words:
+            try:
+                nltk.data.find("stopwords")
+            except LookupError:
+                nltk.download("stopwords")
+            self.stop_words = stopwords.words(self.language)
+
+    def execute(self, tokens: List[str], **kwargs: Any) -> List[str]:
+        if not self.stop_words:
+            return tokens
+        return [token for token in tokens if token not in self.stop_words]
+
+
+class RemovePunctuation(TextCleaningFunction):
+    def execute(self, tokens: List[str], **kwargs: Any) -> List[str]:
+        return [
+            token.translate(token.maketrans("", "", string.punctuation))
+            for token in tokens
+            if len(token.translate(token.maketrans("", "", string.punctuation)))
+        ]
+
+
+# Transforms tokens to standardized form
+class TokenStemming(TextCleaningFunction):
+    stemmer: Optional[Any] = None
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+        if not self.stemmer:
+            try:
+                from nltk.stem import PorterStemmer
+
+                self.stemmer = PorterStemmer()
+            except ImportError:
+                cleaner_func_logger.warning(
+                    "NLTK module is not installed hence token stemming will not work"
+                )
+
+    def execute(self, tokens: List[str], **kwargs: Any) -> List[str]:
+        if not self.stemmer:
+            return tokens
+        return [self.stemmer.stem(token) for token in tokens]
+
+
+class RemoveSpecialChars(TextCleaningFunction):
+    """
+    Removes special characters by eliminating all characters from each token
+    and only retains alphabetic, numeric or alphanumeric tokens by stripping
+    special characters from them
+    """
+
+    def execute(self, tokens: List[str], **kwargs: Any) -> List[str]:
+        cleaned_tokens = [re.sub("[^A-Za-z0-9]+", "", token) for token in tokens]
+        return [token for token in cleaned_tokens if token != ""]
+
+
+# Converts unicodes to ASCII characters
+class DecodeUnicode(TextCleaningFunction):
+    def execute(self, tokens: List[str], **kwargs: Any) -> List[str]:
+        return [
+            normalize("NFKD", token).encode("ascii", "ignore").decode("utf-8")
+            for token in tokens
+        ]
+
+
+class RemoveDateTime(TextCleaningFunction):
+    _white_space_cleaner = RemoveWhiteSpaceAndEmptyToken()
+
+    def execute(self, tokens: List[str], **kwargs: Any) -> List[str]:
+        text: str = " ".join(tokens)
+        try:
+            fuzzy_tokens: Tuple[str]
+            _, fuzzy_tokens = parse(text, fuzzy_with_tokens=True)  # type: ignore
+            tokens = " ".join(fuzzy_tokens).split()
+        except ValueError:
+            cleaner_func_logger.warning("Token contain invalid date time format")
+        return self._white_space_cleaner.execute(tokens)
+
+
+# Replaces domain specific keywords
+class ReplaceDomainKeywords(TextCleaningFunction):
+    domain_keywords: Optional[List[Tuple[str, str]]] = None
+
+    def execute(self, tokens: List[str], **kwargs: Any) -> List[str]:
+        # don't do anything when no domain keywords specified
+        if not self.domain_keywords or len(self.domain_keywords) == 0:
+            return tokens
+
+        text: str = " ".join(tokens)
+        for source_keyword, target_keyword in self.domain_keywords:
+            if source_keyword in text or source_keyword.lower() in text:
+                text = text.replace(source_keyword, target_keyword)
+        tokens = text.split()
+        return tokens
+
+
+class RegExSubstitute(TextCleaningFunction):
+    pattern: Optional[str] = None
+    substitute: Optional[str] = None
+
+    def execute(self, tokens: List[str], **kwargs: Any) -> List[str]:
+        if not self.pattern or not self.substitute:
+            return tokens
+
+        compiled_regex = re.compile(self.pattern)
+
+        return [compiled_regex.sub(self.substitute, token) for token in tokens]
+
+
+class SpacyLemmatization(TextCleaningFunction):
+    _nlp: Language = PrivateAttr()
+    model_name_or_path: str = Field("en_core_web_sm")
+    batch_size: int = 4
+    n_process: int = 1
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+        try:
+            self._nlp = spacy.load(
+                self.model_name_or_path,
+                disable=["parser", "ner"],
+            )
+        except:
+            download(self.model_name_or_path)
+            self._nlp = spacy.load(
+                self.model_name_or_path,
+                disable=["parser", "ner"],
+            )
+
+    def execute(self, tokens: List[str], **kwargs: Any) -> List[str]:
+        processed_tokens: List[str] = []
+        for doc in self._nlp.pipe(texts=tokens, batch_size=self.batch_size, n_process=self.n_process):
+            processed_tokens.append(" ".join([token.lemma_ for token in doc]))
+        return processed_tokens
diff --git a/obsei_module/obsei/preprocessor/text_splitter.py b/obsei_module/obsei/preprocessor/text_splitter.py
new file mode 100644
index 0000000000000000000000000000000000000000..9124d367567bcbbef8149965afc2a750c16664f2
--- /dev/null
+++ b/obsei_module/obsei/preprocessor/text_splitter.py
@@ -0,0 +1,129 @@
+import logging
+from typing import List, Optional, Any
+import uuid
+
+import nltk
+from nltk import sent_tokenize
+from pydantic import BaseModel
+
+from obsei.payload import TextPayload
+from obsei.preprocessor.base_preprocessor import (
+    BaseTextPreprocessor,
+    BaseTextProcessorConfig,
+)
+
+logger = logging.getLogger(__name__)
+
+
+class TextSplitterPayload(BaseModel):
+    phrase: str
+    chunk_id: int
+    chunk_length: int
+    document_id: str
+    total_chunks: Optional[int] = None
+
+
+class TextSplitterConfig(BaseTextProcessorConfig):
+    max_split_length: int = 512
+    split_stride: int = 0  # overlap length
+    document_id_key: Optional[str] = None  # document_id in meta
+    enable_sentence_split: bool = False
+    honor_paragraph_boundary: bool = False
+    paragraph_marker: str = '\n\n'
+    sentence_tokenizer: str = 'tokenizers/punkt/PY3/english.pickle'
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+
+        if self.enable_sentence_split:
+            nltk.download('punkt')
+
+
+class TextSplitter(BaseTextPreprocessor):
+    def preprocess_input(  # type: ignore[override]
+        self, input_list: List[TextPayload], config: TextSplitterConfig, **kwargs: Any
+    ) -> List[TextPayload]:
+        text_splits: List[TextPayload] = []
+
+        for idx, input_data in enumerate(input_list):
+            if (
+                config.document_id_key
+                and input_data.meta
+                and config.document_id_key in input_data.meta
+            ):
+                document_id = str(input_data.meta.get(config.document_id_key))
+            else:
+                document_id = uuid.uuid4().hex
+
+            if config.honor_paragraph_boundary:
+                paragraphs = input_data.processed_text.split(config.paragraph_marker)
+            else:
+                paragraphs = [input_data.processed_text]
+
+            atomic_texts: List[str] = []
+            for paragraph in paragraphs:
+                if config.enable_sentence_split:
+                    atomic_texts.extend(sent_tokenize(paragraph))
+                else:
+                    atomic_texts.append(paragraph)
+
+            split_id = 0
+            document_splits: List[TextSplitterPayload] = []
+            for text in atomic_texts:
+                text_length = len(text)
+                if text_length == 0:
+                    continue
+
+                start_idx = 0
+                while start_idx < text_length:
+                    if config.split_stride > 0 and start_idx > 0:
+                        start_idx = (
+                            self._valid_index(
+                                text, start_idx - config.split_stride
+                            )
+                            + 1
+                        )
+                    end_idx = self._valid_index(
+                        text,
+                        min(start_idx + config.max_split_length, text_length),
+                    )
+
+                    phrase = text[start_idx:end_idx]
+                    document_splits.append(
+                        TextSplitterPayload(
+                            phrase=phrase,
+                            chunk_id=split_id,
+                            chunk_length=len(phrase),
+                            document_id=document_id,
+                        )
+                    )
+                    start_idx = end_idx + 1
+                    split_id += 1
+
+            total_splits = len(document_splits)
+            for split in document_splits:
+                split.total_chunks = total_splits
+                payload = TextPayload(
+                    processed_text=split.phrase,
+                    source_name=input_data.source_name,
+                    segmented_data=input_data.segmented_data,
+                    meta={**input_data.meta, **{"splitter": split}}
+                    if input_data.meta
+                    else {"splitter": split},
+                )
+                text_splits.append(payload)
+
+        return text_splits
+
+    @staticmethod
+    def _valid_index(document: str, idx: int) -> int:
+        if idx <= 0:
+            return 0
+        if idx >= len(document):
+            return len(document)
+        new_idx = idx
+        while new_idx > 0:
+            if document[new_idx] in [" ", "\n", "\t"]:
+                break
+            new_idx -= 1
+        return new_idx
diff --git a/obsei_module/obsei/preprocessor/text_tokenizer.py b/obsei_module/obsei/preprocessor/text_tokenizer.py
new file mode 100644
index 0000000000000000000000000000000000000000..5632c43d2ba0f9f01a69de333ea764b6653b11d6
--- /dev/null
+++ b/obsei_module/obsei/preprocessor/text_tokenizer.py
@@ -0,0 +1,29 @@
+import logging
+from abc import abstractmethod
+from typing import Any, List, Optional
+
+import nltk
+from nltk import word_tokenize
+from pydantic import BaseModel
+
+logger = logging.getLogger(__name__)
+
+
+class BaseTextTokenizer(BaseModel):
+    @abstractmethod
+    def tokenize_text(self, text: str) -> List[str]:
+        pass
+
+
+class NLTKTextTokenizer(BaseTextTokenizer):
+    tokenizer_name: Optional[str] = "punkt"
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+        try:
+            nltk.data.find(f"tokenizers/{self.tokenizer_name}")
+        except LookupError:
+            nltk.download(f"{self.tokenizer_name}")
+
+    def tokenize_text(self, text: str) -> Any:
+        return word_tokenize(text)
diff --git a/obsei_module/obsei/process_workflow.py b/obsei_module/obsei/process_workflow.py
new file mode 100644
index 0000000000000000000000000000000000000000..cbd74b11ab26d10f1aeb344e0b7f30bfa1b687b1
--- /dev/null
+++ b/obsei_module/obsei/process_workflow.py
@@ -0,0 +1,38 @@
+import logging
+
+from obsei.analyzer.base_analyzer import BaseAnalyzer, BaseAnalyzerConfig
+from obsei.configuration import ObseiConfiguration
+from obsei.sink.base_sink import BaseSink, BaseSinkConfig
+from obsei.source.base_source import BaseSourceConfig, BaseSource
+
+logger = logging.getLogger(__name__)
+
+# Extract config via yaml file using `config_path` and `config_filename`
+obsei_configuration = ObseiConfiguration()
+
+# Initialize objects using configuration
+source_config: BaseSourceConfig = obsei_configuration.initialize_instance("source_config")
+source: BaseSource = obsei_configuration.initialize_instance("source")
+analyzer: BaseAnalyzer = obsei_configuration.initialize_instance("analyzer")
+analyzer_config: BaseAnalyzerConfig = obsei_configuration.initialize_instance("analyzer_config")
+sink_config: BaseSinkConfig = obsei_configuration.initialize_instance("sink_config")
+sink: BaseSink = obsei_configuration.initialize_instance("sink")
+
+# This will fetch information from configured source ie twitter, app store etc
+source_response_list = source.lookup(source_config)
+for idx, source_response in enumerate(source_response_list):
+    logger.info(f"source_response#'{idx}'='{vars(source_response)}'")
+
+# This will execute analyzer (Sentiment, classification etc) on source data with provided analyzer_config
+# Analyzer will it's output to `segmented_data` inside `analyzer_response`
+analyzer_response_list = analyzer.analyze_input(
+    source_response_list=source_response_list,
+    analyzer_config=analyzer_config
+)
+for idx, analyzer_response in enumerate(analyzer_response_list):
+    logger.info(f"source_response#'{idx}'='{vars(analyzer_response)}'")
+
+# This will send analyzed output to configure sink ie Slack, Zendesk etc
+sink_response_list = sink.send_data(analyzer_response_list, sink_config)
+for idx, sink_response in enumerate(sink_response_list):
+    logger.info(f"source_response#'{idx}'='{vars(sink_response)}'")
diff --git a/obsei_module/obsei/processor.py b/obsei_module/obsei/processor.py
new file mode 100644
index 0000000000000000000000000000000000000000..20ccec4b3d85dee3949e83a37c24fe3ed8c7e49e
--- /dev/null
+++ b/obsei_module/obsei/processor.py
@@ -0,0 +1,68 @@
+import logging
+from typing import Optional
+
+from pydantic import BaseModel
+
+from obsei.analyzer.base_analyzer import BaseAnalyzer, BaseAnalyzerConfig
+from obsei.sink.base_sink import BaseSink, BaseSinkConfig
+from obsei.source.base_source import BaseSource, BaseSourceConfig
+from obsei.workflow.workflow import Workflow
+
+logger = logging.getLogger(__name__)
+
+
+class Processor(BaseModel):
+    analyzer: BaseAnalyzer
+    analyzer_config: Optional[BaseAnalyzerConfig] = None
+    source: Optional[BaseSource] = None
+    source_config: Optional[BaseSourceConfig] = None
+    sink: Optional[BaseSink] = None
+    sink_config: Optional[BaseSinkConfig] = None
+
+    def process(
+        self,
+        workflow: Optional[Workflow] = None,
+        source: Optional[BaseSource] = None,
+        source_config: Optional[BaseSourceConfig] = None,
+        sink: Optional[BaseSink] = None,
+        sink_config: Optional[BaseSinkConfig] = None,
+        analyzer: Optional[BaseAnalyzer] = None,
+        analyzer_config: Optional[BaseAnalyzerConfig] = None,
+    ) -> None:
+        source = source or self.source
+        sink = sink or self.sink
+        analyzer = analyzer or self.analyzer
+
+        id: Optional[str] = None
+        if workflow:
+            sink_config = workflow.config.sink_config
+            source_config = workflow.config.source_config
+            analyzer_config = workflow.config.analyzer_config
+            id = workflow.id
+        else:
+            sink_config = sink_config or self.sink_config
+            source_config = source_config or self.source_config
+            analyzer_config = analyzer_config or self.analyzer_config
+
+        if source is None or source_config is None:
+            return
+        if sink is None or sink_config is None:
+            return
+
+        source_response_list = source.lookup(config=source_config, id=id)
+        for idx, source_response in enumerate(source_response_list):
+            logger.info(f"source_response#'{idx}'='{source_response}'")
+
+        analyzer_response_list = analyzer.analyze_input(
+            source_response_list=source_response_list,
+            analyzer_config=analyzer_config,
+            id=id,
+        )
+        for idx, analyzer_response in enumerate(analyzer_response_list):
+            logger.info(f"source_response#'{idx}'='{analyzer_response}'")
+
+        sink_response_list = sink.send_data(
+            analyzer_responses=analyzer_response_list, config=sink_config, id=id
+        )
+        for idx, sink_response in enumerate(sink_response_list):
+            logger.info(f"source_response#'{idx}'='{sink_response}'")
diff --git a/obsei_module/obsei/sink/__init__.py b/obsei_module/obsei/sink/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/obsei_module/obsei/sink/__pycache__/__init__.cpython-311.pyc b/obsei_module/obsei/sink/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b297d5914f5245beea12e2c302fae0fae07eca46
Binary files /dev/null and b/obsei_module/obsei/sink/__pycache__/__init__.cpython-311.pyc differ
diff --git a/obsei_module/obsei/sink/__pycache__/http_sink.cpython-311.pyc b/obsei_module/obsei/sink/__pycache__/http_sink.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..69ce8207c060157191857f7e04c0e3500a9d4b69
Binary files /dev/null and b/obsei_module/obsei/sink/__pycache__/http_sink.cpython-311.pyc differ
diff --git a/obsei_module/obsei/sink/__pycache__/pandas_sink.cpython-311.pyc b/obsei_module/obsei/sink/__pycache__/pandas_sink.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c9871a8d866e7c17b1e66562fcc192cd8bb9665b
Binary files /dev/null and b/obsei_module/obsei/sink/__pycache__/pandas_sink.cpython-311.pyc differ
diff --git a/obsei_module/obsei/sink/base_sink.py b/obsei_module/obsei/sink/base_sink.py
new file mode 100644
index 0000000000000000000000000000000000000000..a0676fb7db4132085ff54bc9c8d71af8c9b58ada
--- /dev/null
+++ b/obsei_module/obsei/sink/base_sink.py
@@ -0,0 +1,54 @@
+from abc import abstractmethod
+from typing import Any, Dict, List, Optional, Type, TypeVar
+
+from pydantic import Field
+from pydantic_settings import BaseSettings
+
+from obsei.payload import TextPayload
+from obsei.workflow.base_store import BaseStore
+
+
+class Convertor(BaseSettings):
+    def convert(
+            self,
+            analyzer_response: TextPayload,
+            base_payload: Optional[Dict[str, Any]] = None,
+            **kwargs: Any
+    ) -> Dict[str, Any]:
+        base_payload = base_payload or dict()
+        return (
+            {**base_payload, **analyzer_response.to_dict()}
+            if base_payload is not None
+            else analyzer_response.to_dict()
+        )
+
+    class Config:
+        arbitrary_types_allowed = True
+
+
+T = TypeVar('T', bound='BaseSinkConfig')
+
+
+class BaseSinkConfig(BaseSettings):
+    TYPE: str = "Base"
+
+    @classmethod
+    def from_dict(cls: Type[T], config: Dict[str, Any]) -> T:  # type: ignore[empty-body]
+        pass
+
+    class Config:
+        arbitrary_types_allowed = True
+
+
+class BaseSink(BaseSettings):
+    convertor: Convertor = Field(Convertor())
+    store: Optional[BaseStore] = None
+
+    @abstractmethod
+    def send_data(
+            self, analyzer_responses: List[TextPayload], config: BaseSinkConfig, **kwargs: Any
+    ) -> Any:
+        pass
+
+    class Config:
+        arbitrary_types_allowed = True
diff --git a/obsei_module/obsei/sink/dailyget_sink.py b/obsei_module/obsei/sink/dailyget_sink.py
new file mode 100644
index 0000000000000000000000000000000000000000..5c501a7a7940b15dcbeeee4d23172f94269ed37d
--- /dev/null
+++ b/obsei_module/obsei/sink/dailyget_sink.py
@@ -0,0 +1,166 @@
+import json
+import logging
+from copy import deepcopy
+from datetime import timezone
+from typing import Any, Dict, List, Optional
+
+import pytz
+import requests
+from dateutil import parser
+
+from obsei.sink.base_sink import Convertor
+from obsei.sink.http_sink import HttpSink, HttpSinkConfig
+from obsei.payload import TextPayload
+from obsei.misc.utils import flatten_dict
+
+logger = logging.getLogger(__name__)
+
+
+TWITTER_URL_PREFIX = "https://twitter.com/"
+IST_TZ = pytz.timezone("Asia/Kolkata")
+
+
+class PayloadConvertor(Convertor):
+    def convert(
+        self,
+        analyzer_response: TextPayload,
+        base_payload: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> Dict[str, Any]:
+        request_payload = base_payload or {}
+        use_enquiry_api = kwargs.get("use_enquiry_api", False)
+
+        if analyzer_response.source_name != "Twitter":
+            return {**request_payload, **analyzer_response.to_dict()}
+
+        source_information = kwargs["source_information"]
+        partner_id = kwargs["partner_id"]
+
+        user_url = ""
+        positive = 0.0
+        negative = 0.0
+        text = ""
+        tweet_id = None
+        created_at_str = None
+        classification_list: List[str] = []
+
+        flat_dict = flatten_dict(analyzer_response.to_dict())
+        for k, v in flat_dict.items():
+            if "username" in k:
+                user_url = TWITTER_URL_PREFIX + v
+            elif "text" in k:
+                text = str(v).replace("\n", " ")
+            elif "positive" in k:
+                positive = float(v)
+            elif "negative" in k:
+                negative = float(v)
+            elif "meta_id" in k:
+                tweet_id = v
+            elif "created_at" in k:
+                created_at_str = v
+            elif "segmented_data" in k and len(classification_list) < 2:
+                classification_list.append(k.rsplit("_", 1)[1])
+
+        created_at_str_parsed: Optional[str] = None
+        if created_at_str:
+            created_at = parser.isoparse(created_at_str)
+            created_at_str_parsed = (
+                created_at.replace(tzinfo=timezone.utc)
+                .astimezone(tz=IST_TZ)
+                .strftime("%Y-%m-%d %H:%M:%S")
+            )
+
+        tweet_url = f"{user_url}/status/{tweet_id}"
+        # Sentiment rules
+        if negative > 8.0:
+            sentiment = "Strong Negative"
+        elif 0.3 < negative <= 8.0:
+            sentiment = "Negative"
+        elif positive >= 0.8:
+            sentiment = "Strong Positive"
+        elif 0.4 < positive < 0.8:
+            sentiment = "Positive"
+        else:
+            sentiment = "Neutral"
+
+        if use_enquiry_api:
+            enquiry = {
+                "Source": source_information,
+                "FeedbackBy": user_url,
+                "Sentiment": sentiment,
+                "TweetUrl": tweet_url,
+                "FormattedText": text,
+                "PredictedCategories": ",".join(classification_list),
+            }
+
+            if created_at_str_parsed is not None:
+                enquiry["ReportedAt"] = created_at_str_parsed
+
+            kv_str_list = [k + ": " + str(v) for k, v in enquiry.items()]
+            request_payload["enquiryMessage"] = "\n".join(kv_str_list)
+        else:
+            message = {
+                "message": text,
+                "partnerId": partner_id,
+                "query": source_information,
+                "source": analyzer_response.source_name,
+                "url": tweet_url,
+                "userProfile": user_url,
+                "sentiment": sentiment,
+                "predictedCategories": ",".join(classification_list),
+                "metadata": str(json.dumps(analyzer_response.segmented_data, ensure_ascii=False)),
+                "originatedAt": created_at_str,
+            }
+            request_payload["messageDetail"] = str(json.dumps(message, ensure_ascii=False))
+
+        return request_payload
+
+
+class DailyGetSinkConfig(HttpSinkConfig):
+    TYPE: str = "DailyGet"
+    partner_id: str
+    consumer_phone_number: str
+    source_information: str
+    use_enquiry_api: bool = False
+    headers: Dict[str, Any] = {"Content-type": "application/json"}
+
+
+class DailyGetSink(HttpSink):
+    def __init__(self, convertor: Convertor = PayloadConvertor(), **data: Any):
+        super().__init__(convertor=convertor, **data)
+
+    def send_data(  # type: ignore[override]
+        self,
+        analyzer_responses: List[TextPayload],
+        config: DailyGetSinkConfig,
+        **kwargs: Any,
+    ) -> Any:
+        headers = config.headers
+
+        payloads = []
+        responses = []
+        for analyzer_response in analyzer_responses:
+            payloads.append(
+                self.convertor.convert(
+                    analyzer_response=analyzer_response,
+                    base_payload=dict()
+                    if config.base_payload is None
+                    else deepcopy(config.base_payload),
+                    source_information=config.source_information,
+                    use_enquiry_api=config.use_enquiry_api,
+                    partner_id=config.partner_id
+                )
+            )
+
+        for payload in payloads:
+            response = requests.post(
+                url=config.url,
+                json=payload,
+                headers=headers,
+            )
+
+            logger.info(f"payload='{payload}'")
+            logger.info(f"response='{response.__dict__}'")
+            responses.append(response)
+
+        return responses
diff --git a/obsei_module/obsei/sink/elasticsearch_sink.py b/obsei_module/obsei/sink/elasticsearch_sink.py
new file mode 100644
index 0000000000000000000000000000000000000000..8ade40ac9957109d6b91912f9f8f9120bbcd3ea6
--- /dev/null
+++ b/obsei_module/obsei/sink/elasticsearch_sink.py
@@ -0,0 +1,101 @@
+from copy import deepcopy
+from typing import Any, Dict, List, Optional, Union
+
+from elasticsearch import Elasticsearch, RequestError
+from elasticsearch.helpers import bulk
+from pydantic import Field, PrivateAttr, SecretStr
+
+from obsei.sink.base_sink import BaseSink, BaseSinkConfig, Convertor
+from obsei.payload import TextPayload
+
+
+class ElasticSearchSinkConfig(BaseSinkConfig):
+    # This is done to avoid exposing member to API response
+    _es_client: Elasticsearch = PrivateAttr()
+    TYPE: str = "Elasticsearch"
+    hosts: Union[str, List[str], None]
+    index_name: str = "es_index"
+    username: SecretStr = Field(SecretStr(""), env="elasticsearch_username")
+    password: SecretStr = Field(SecretStr(""), env="elasticsearch_password")
+    ca_certs: str = Field("<DEFAULT>")
+    verify_certs: bool = False
+    create_index: bool = True
+    timeout: int = 30
+    custom_mapping: Optional[Dict[str, Any]] = None
+    refresh_type: str = "wait_for"
+    base_payload: Optional[Dict[str, Any]] = None
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+        self._es_client = Elasticsearch(
+            hosts=self.hosts,
+            http_auth=(
+                self.username.get_secret_value(),
+                self.password.get_secret_value(),
+            ),
+            ca_certs=self.ca_certs,
+            verify_certs=self.verify_certs,
+            timeout=self.timeout,
+        )
+        self.base_payload = self.base_payload or {
+            "_op_type": "create",  # TODO update exiting support?
+            "_index": self.index_name,
+        }
+        if self.create_index:
+            self._create_index(self.index_name)
+
+    def _create_index(self, index_name: str) -> None:
+        if self.custom_mapping:
+            mapping = self.custom_mapping
+        else:
+            mapping = {
+                "mappings": {
+                    "dynamic_templates": [
+                        {
+                            "strings": {
+                                "path_match": "*",
+                                "match_mapping_type": "string",
+                                "mapping": {"type": "keyword"},
+                            }
+                        }
+                    ],
+                }
+            }
+
+        try:
+            self._es_client.indices.create(index=index_name, mappings=mapping)
+        except RequestError as e:
+            # With multiple workers we need to avoid race conditions, where:
+            # - there's no index in the beginning
+            # - both want to create one
+            # - one fails as the other one already created it
+            if not self._es_client.indices.exists(index=index_name):
+                raise e
+
+    def bulk(self, payloads: List[Dict[str, Any]]) -> Any:
+        return bulk(
+            self._es_client, payloads, request_timeout=300, refresh=self.refresh_type
+        )
+
+
+class ElasticSearchSink(BaseSink):
+    def __init__(self, convertor: Convertor = Convertor(), **data: Any):
+        super().__init__(convertor=convertor, **data)
+
+    def send_data(  # type: ignore[override]
+        self,
+        analyzer_responses: List[TextPayload],
+        config: ElasticSearchSinkConfig,
+        **kwargs: Any
+    ) -> Any:
+
+        payloads = []
+        for analyzer_response in analyzer_responses:
+            payloads.append(
+                self.convertor.convert(
+                    analyzer_response=analyzer_response,
+                    base_payload=deepcopy(config.base_payload),
+                )
+            )
+
+        return config.bulk(payloads)
diff --git a/obsei_module/obsei/sink/http_sink.py b/obsei_module/obsei/sink/http_sink.py
new file mode 100644
index 0000000000000000000000000000000000000000..6d07845608180ac934e82cbdbb43dc747b7ce6ad
--- /dev/null
+++ b/obsei_module/obsei/sink/http_sink.py
@@ -0,0 +1,49 @@
+from copy import deepcopy
+from typing import Any, Dict, List, Optional
+from urllib.request import Request, urlopen
+
+from obsei.misc.utils import obj_to_json
+from obsei.sink.base_sink import BaseSink, BaseSinkConfig, Convertor
+from obsei.payload import TextPayload
+
+DEFAULT_HEADERS = {"Content-type": "application/json"}
+
+
+class HttpSinkConfig(BaseSinkConfig):
+    TYPE: str = "Http"
+    url: str
+    headers: Optional[Dict[str, Any]] = None
+    base_payload: Optional[Dict[str, Any]] = None
+    # analyzer_output to payload mapping
+    payload_mapping: Optional[Dict[str, List[str]]] = None
+    field_conversion: Optional[Dict[str, str]] = None
+
+
+class HttpSink(BaseSink):
+    def __init__(self, convertor: Convertor = Convertor(), **data: Any):
+        super().__init__(convertor=convertor, **data)
+
+    def send_data(  # type: ignore[override]
+        self, analyzer_responses: List[TextPayload], config: HttpSinkConfig, **kwargs: Any
+    ) -> Any:
+
+        headers = config.headers or DEFAULT_HEADERS
+
+        payloads = []
+        responses = []
+        for analyzer_response in analyzer_responses:
+            payloads.append(
+                self.convertor.convert(
+                    analyzer_response=analyzer_response,
+                    base_payload=dict()
+                    if config.base_payload is None
+                    else deepcopy(config.base_payload),
+                )
+            )
+
+        for payload in payloads:
+            req = Request(config.url, data=obj_to_json(payload), headers=headers)
+            response = urlopen(req)
+            responses.append(response)
+
+        return responses
diff --git a/obsei_module/obsei/sink/jira_sink.py b/obsei_module/obsei/sink/jira_sink.py
new file mode 100644
index 0000000000000000000000000000000000000000..a6a8bef3c51c01024750354ef185c87b0802e49c
--- /dev/null
+++ b/obsei_module/obsei/sink/jira_sink.py
@@ -0,0 +1,106 @@
+import logging
+import textwrap
+from typing import Any, Dict, List, Optional
+
+from atlassian import Jira
+from pydantic import Field, PrivateAttr, SecretStr
+
+from obsei.sink.base_sink import BaseSink, BaseSinkConfig, Convertor
+from obsei.payload import TextPayload
+from obsei.misc.utils import obj_to_markdown
+
+logger = logging.getLogger(__name__)
+
+
+class JiraPayloadConvertor(Convertor):
+    def convert(
+        self,
+        analyzer_response: TextPayload,
+        base_payload: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> Dict[str, Any]:
+        summary_max_length = kwargs.get("summary_max_length", 50)
+
+        payload = base_payload or dict()
+        payload["description"] = obj_to_markdown(
+            obj=analyzer_response,
+            str_enclose_start="{quote}",
+            str_enclose_end="{quote}",
+        )
+        payload["summary"] = textwrap.shorten(
+            text=analyzer_response.processed_text, width=summary_max_length
+        )
+
+        # TODO: Find correct payload to update labels fields
+        labels_count = kwargs.get("labels_count", 1)
+        # labels = [v for k, v in sorted(analyzer_response.segmented_data.items(), key=lambda item: item[1])]
+        # payload['labels'] = [{"name": label} for label in labels[:labels_count]]
+
+        return payload
+
+
+class JiraSinkConfig(BaseSinkConfig):
+    # This is done to avoid exposing member to API response
+    _jira_client: Jira = PrivateAttr()
+    TYPE: str = "Jira"
+    url: str
+    username: Optional[SecretStr] = Field(None, env="jira_username")
+    password: Optional[SecretStr] = Field(None, env="jira_password")
+    issue_type: Dict[str, str]
+    project: Dict[str, str]
+    update_history: bool = True
+    verify_ssl: bool = False
+    summary_max_length: int = 50
+    labels_count: int = 2  # Number of labels to fetch
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+        if self.username is None or self.password is None:
+            raise AttributeError(
+                "Jira informer need username and password"
+            )
+
+        self._jira_client = Jira(
+            url=self.url,
+            username=self.username.get_secret_value(),
+            password=self.password.get_secret_value(),
+            verify_ssl=self.verify_ssl,
+        )
+
+    def get_jira_client(self) -> Jira:
+        return self._jira_client
+
+
+class JiraSink(BaseSink):
+    def __init__(self, convertor: Convertor = JiraPayloadConvertor(), **data: Any):
+        super().__init__(convertor=convertor, **data)
+
+    def send_data(  # type: ignore[override]
+        self,
+        analyzer_responses: List[TextPayload],
+        config: JiraSinkConfig,
+        **kwargs: Any,
+    ) -> Any:
+        responses = []
+        payloads = []
+        for analyzer_response in analyzer_responses:
+            payloads.append(
+                self.convertor.convert(
+                    analyzer_response=analyzer_response,
+                    base_payload={
+                        "project": config.project,
+                        "issuetype": config.issue_type,
+                    },
+                    summary_max_length=config.summary_max_length,
+                    labels_count=config.labels_count,
+                )
+            )
+
+        for payload in payloads:
+            response = config.get_jira_client().create_issue(
+                fields=payload, update_history=config.update_history
+            )
+            logger.info(f"response='{response}'")
+            responses.append(response)
+
+        return responses
diff --git a/obsei_module/obsei/sink/logger_sink.py b/obsei_module/obsei/sink/logger_sink.py
new file mode 100644
index 0000000000000000000000000000000000000000..970da21b61d3ee06221f128b5972ede0a323d1fb
--- /dev/null
+++ b/obsei_module/obsei/sink/logger_sink.py
@@ -0,0 +1,39 @@
+import logging
+from logging import Logger
+from typing import Any, List, Optional
+
+from pydantic import Field
+
+from obsei.payload import TextPayload
+from obsei.sink.base_sink import BaseSink, BaseSinkConfig, Convertor
+
+
+class LoggerSinkConfig(BaseSinkConfig):
+    TYPE: str = "Logging"
+    logger: Logger = Field(logging.getLogger(__name__))
+    level: int = Field(logging.INFO)
+
+
+class LoggerSink(BaseSink):
+    TYPE: str = "Logging"
+
+    def __init__(self, convertor: Convertor = Convertor(), **data: Any):
+        super().__init__(convertor=convertor, **data)
+
+    def send_data(  # type: ignore[override]
+        self,
+        analyzer_responses: List[TextPayload],
+        config: LoggerSinkConfig,
+        **kwargs: Any,
+    ) -> Any:
+        converted_responses = []
+        for analyzer_response in analyzer_responses:
+            converted_responses.append(
+                self.convertor.convert(analyzer_response=analyzer_response)
+            )
+
+        for response in converted_responses:
+            dict_to_print = (
+                vars(response) if hasattr(response, "__dict__") else response
+            )
+            config.logger.log(level=config.level, msg=f"{dict_to_print}")
diff --git a/obsei_module/obsei/sink/pandas_sink.py b/obsei_module/obsei/sink/pandas_sink.py
new file mode 100644
index 0000000000000000000000000000000000000000..b25285e3906ef8582b7e80b701149e58c6997662
--- /dev/null
+++ b/obsei_module/obsei/sink/pandas_sink.py
@@ -0,0 +1,71 @@
+from typing import Any, Dict, List, Optional
+
+from pandas import DataFrame
+
+from obsei.payload import TextPayload
+from obsei.misc.utils import flatten_dict
+from obsei.sink.base_sink import BaseSink, BaseSinkConfig, Convertor
+
+
+class PandasConvertor(Convertor):
+    def convert(
+        self,
+        analyzer_response: TextPayload,
+        base_payload: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> Dict[str, Any]:
+        base_payload = base_payload or {}
+        merged_dict = {**base_payload, **analyzer_response.to_dict()}
+        return flatten_dict(merged_dict)
+
+
+class PandasSinkConfig(BaseSinkConfig):
+    TYPE: str = "Pandas"
+    dataframe: Optional[DataFrame] = None
+    # By default it will include all the columns
+    include_columns_list: Optional[List[str]] = None
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+
+        if self.dataframe is None:
+            self.dataframe = DataFrame()
+
+import pandas as pd
+class PandasSink(BaseSink):
+    TYPE: str = "Pandas"
+
+    def __init__(self, convertor: Convertor = PandasConvertor(), **data: Any):
+        super().__init__(convertor=convertor, **data)
+
+    def send_data(  # type: ignore[override]
+        self,
+        analyzer_responses: List[TextPayload],
+        config: PandasSinkConfig,
+        **kwargs: Any,
+    ) -> Any:
+        responses = []
+        for analyzer_response in analyzer_responses:
+            converted_response = self.convertor.convert(
+                analyzer_response=analyzer_response
+            )
+            response: Optional[Dict[str, Any]] = None
+            if config.include_columns_list:
+                response = dict()
+                for k, v in converted_response.items():
+                    if k in config.include_columns_list:
+                        response[k] = v
+            else:
+                response = converted_response
+            responses.append(response)
+        if config.dataframe is not None:
+    # Chuyển responses thành DataFrame
+              new_data = pd.DataFrame(responses)
+    
+    # Hợp nhất DataFrame hiện tại với dữ liệu mới
+              config.dataframe = pd.concat([config.dataframe, new_data], ignore_index=True)
+        else:
+    # Nếu chưa có DataFrame, khởi tạo mới từ responses
+               config.dataframe = pd.DataFrame(responses)
+
+        return config.dataframe
\ No newline at end of file
diff --git a/obsei_module/obsei/sink/slack_sink.py b/obsei_module/obsei/sink/slack_sink.py
new file mode 100644
index 0000000000000000000000000000000000000000..ad613c9f4c21cd42013259d74f07bcb580ae1ced
--- /dev/null
+++ b/obsei_module/obsei/sink/slack_sink.py
@@ -0,0 +1,71 @@
+import json
+import logging
+from typing import Any, List, Optional
+
+from jinja2 import Template
+from pydantic import Field, PrivateAttr, SecretStr
+from slack_sdk import WebClient
+
+from obsei.sink.base_sink import BaseSink, BaseSinkConfig
+from obsei.payload import TextPayload
+
+logger = logging.getLogger(__name__)
+
+
+class SlackSinkConfig(BaseSinkConfig):
+    # This is done to avoid exposing member to API response
+    _slack_client: WebClient = PrivateAttr()
+    TYPE: str = "Slack"
+
+    slack_token: Optional[SecretStr] = Field(None, env="slack_token")
+    channel_id: str = Field("", env="slack_channel_id")
+    jinja_template: Optional[str] = None
+    icon_url: str = "https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/obsei_200x200.png"
+    is_markdown: bool = True
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+        if self.slack_token is None or self.channel_id == '':
+            raise AttributeError(
+                "Slack informer need slack_token and channel_id"
+            )
+
+        self._slack_client = WebClient(token=self.slack_token.get_secret_value())
+
+    def get_slack_client(self) -> WebClient:
+        return self._slack_client
+
+
+class SlackSink(BaseSink):
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+
+    def send_data(  # type: ignore[override]
+        self,
+        analyzer_responses: List[TextPayload],
+        config: SlackSinkConfig,
+        **kwargs: Any,
+    ) -> Any:
+        responses = []
+        payloads = []
+        for analyzer_response in analyzer_responses:
+            payloads.append(self.convertor.convert(analyzer_response=analyzer_response))
+
+        for payload in payloads:
+            if config.jinja_template is not None:
+                template = Template(config.jinja_template)
+                message = template.render(payload=payload)
+            else:
+                message = f'Message: `{str(payload["processed_text"])}` '
+                f'```{json.dumps(payload["segmented_data"], indent=2, ensure_ascii=False)}```'
+
+            response = config.get_slack_client().chat_postMessage(
+                channel=config.channel_id,
+                text=message,
+                icon_url=config.icon_url,
+                mrkdwn=config.is_markdown,
+            )
+            logger.info(f"response='{response}'")
+            responses.append(response)
+
+        return responses
diff --git a/obsei_module/obsei/sink/test1.py b/obsei_module/obsei/sink/test1.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/obsei_module/obsei/sink/zendesk_sink.py b/obsei_module/obsei/sink/zendesk_sink.py
new file mode 100644
index 0000000000000000000000000000000000000000..2992b6139fa75f83b58925127a1472aeb8b9cdd5
--- /dev/null
+++ b/obsei_module/obsei/sink/zendesk_sink.py
@@ -0,0 +1,148 @@
+import json
+import logging
+import textwrap
+from copy import deepcopy
+
+import requests
+from typing import Any, Dict, List, Mapping, Optional
+
+from pydantic import BaseModel, Field, SecretStr
+
+from obsei.sink.base_sink import BaseSink, BaseSinkConfig, Convertor
+from obsei.payload import TextPayload
+from obsei.misc.utils import obj_to_markdown
+
+logger = logging.getLogger(__name__)
+
+
+class ZendeskPayloadConvertor(Convertor):
+    # Refer https://developer.zendesk.com/api-reference/ticketing/tickets/tickets/#create-ticket
+    # for the payload details
+    def convert(
+        self,
+        analyzer_response: TextPayload,
+        base_payload: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> Dict[str, Any]:
+        summary_max_length = kwargs.get("summary_max_length", 50)
+
+        payload = base_payload or dict()
+
+        if "ticket" not in payload:
+            payload["ticket"] = dict()
+
+        if "comment" not in payload["ticket"]:
+            payload["ticket"]["comment"] = dict()
+
+        # For non-html content, use "body" key
+        payload["html_body"] = obj_to_markdown(
+            obj=analyzer_response,
+            str_enclose_start="{quote}",
+            str_enclose_end="{quote}",
+        )
+
+        payload["subject"] = textwrap.shorten(
+            text=analyzer_response.processed_text, width=summary_max_length
+        )
+
+        if analyzer_response.segmented_data is not None and isinstance(
+            analyzer_response.segmented_data, Mapping
+        ):
+            labels_count = kwargs.get("labels_count", 1)
+            labels = [
+                str(v)
+                for k, v in analyzer_response.segmented_data.items()
+            ]
+            payload["tags"] = [label for label in labels[:labels_count]]
+
+        return payload
+
+
+class ZendeskCredInfo(BaseModel):
+    email: Optional[str] = Field(None, env="zendesk_email")
+    password: Optional[SecretStr] = Field(None, env="zendesk_password")
+    oauth_token: Optional[SecretStr] = Field(None, env="zendesk_oauth_token")
+    token: Optional[SecretStr] = Field(None, env="zendesk_token")
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+
+        if not self.oauth_token and not self.token and not self.email and not self.password:
+            raise ValueError("At least one credential is required")
+
+        if self.password and self.token:
+            raise ValueError("Only one of password or token can be provided")
+
+    def get_session(self) -> requests.Session:
+        session = requests.Session()
+
+        if self.oauth_token:
+            session.headers.update({"Authorization": f'Bearer {self.oauth_token.get_secret_value()}'})
+        elif self.email and self.token:
+            session.auth = (f'{self.email}/token', self.token.get_secret_value())
+        elif self.email and self.password:
+            session.auth = (self.email, self.password.get_secret_value())
+
+        return session
+
+
+class ZendeskSinkConfig(BaseSinkConfig):
+    TYPE: str = "Zendesk"
+    ticket_api: str = Field(default="/api/v2/tickets.json")
+    scheme: str = Field(default="https", env="zendesk_scheme")
+    domain: str = Field(default="zendesk.com", env="zendesk_domain")
+    subdomain: Optional[str] = Field(None, env="zendesk_subdomain")
+    cred_info: Optional[ZendeskCredInfo] = Field(None)
+    summary_max_length: int = 50
+    labels_count: int = 3  # Number of labels to fetch
+    base_payload: Optional[Dict[str, Any]] = None
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+
+        self.cred_info = self.cred_info or ZendeskCredInfo()
+
+    def get_endpoint(self) -> str:
+        sub_prefix = "" if self.subdomain is None or self.subdomain == '' else f"/{self.subdomain}."
+        return f'{self.scheme}://{sub_prefix}{self.domain}{self.ticket_api}'
+
+
+class ZendeskSink(BaseSink):
+    def __init__(self, convertor: Convertor = ZendeskPayloadConvertor(), **data: Any):
+        super().__init__(convertor=convertor, **data)
+
+    def send_data(  # type: ignore[override]
+        self,
+        analyzer_responses: List[TextPayload],
+        config: ZendeskSinkConfig,
+        **kwargs: Any,
+    ) -> Any:
+        responses: List[Any] = []
+        payloads: List[Dict[str, Any]] = []
+
+        if config.cred_info is None:
+            logger.error("Zendesk credentials are not provided")
+            return responses
+
+        for analyzer_response in analyzer_responses:
+            payloads.append(
+                self.convertor.convert(
+                    analyzer_response=analyzer_response,
+                    base_payload=dict()
+                    if config.base_payload is None
+                    else deepcopy(config.base_payload),
+                    summary_max_length=config.summary_max_length,
+                    labels_count=config.labels_count,
+                )
+            )
+
+        for payload in payloads:
+            session = config.cred_info.get_session()
+            response = session.post(
+                config.get_endpoint(),
+                json=json.dumps(payload["segmented_data"], indent=2, ensure_ascii=False)
+            )
+            logger.info(f"response='{response}'")
+            responses.append(response)
+
+        return responses
diff --git a/obsei_module/obsei/source/__init__.py b/obsei_module/obsei/source/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/obsei_module/obsei/source/__pycache__/__init__.cpython-311.pyc b/obsei_module/obsei/source/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1471c7f03f120d05377e72a4a37a8d7ba38f57e6
Binary files /dev/null and b/obsei_module/obsei/source/__pycache__/__init__.cpython-311.pyc differ
diff --git a/obsei_module/obsei/source/__pycache__/website_crawler_source.cpython-311.pyc b/obsei_module/obsei/source/__pycache__/website_crawler_source.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..cd8cea74a2b651b260b9b101cc684c3cc24e600e
Binary files /dev/null and b/obsei_module/obsei/source/__pycache__/website_crawler_source.cpython-311.pyc differ
diff --git a/obsei_module/obsei/source/appstore_scrapper.py b/obsei_module/obsei/source/appstore_scrapper.py
new file mode 100644
index 0000000000000000000000000000000000000000..f180aa23c4de4de307c85a54418ff5334d7be2c8
--- /dev/null
+++ b/obsei_module/obsei/source/appstore_scrapper.py
@@ -0,0 +1,150 @@
+import logging
+import re
+from datetime import datetime, timezone
+from typing import Any, Dict, List, Optional, Tuple
+from urllib import parse
+
+from app_store.app_store_reviews_reader import AppStoreReviewsReader
+
+from obsei.misc.web_search import perform_search
+from obsei.source.base_source import BaseSource, BaseSourceConfig
+from obsei.payload import TextPayload
+from obsei.misc.utils import (
+    DATETIME_STRING_PATTERN,
+    DEFAULT_LOOKUP_PERIOD,
+    convert_utc_time,
+)
+
+logger = logging.getLogger(__name__)
+
+
+class AppStoreScrapperConfig(BaseSourceConfig):
+    TYPE: str = "AppStoreScrapper"
+    app_url: Optional[str] = None
+    countries: Optional[List[str]] = None
+    app_id: Optional[str] = None
+    app_name: Optional[str] = None
+    lookup_period: Optional[str] = None
+    max_count: Optional[int] = None
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+
+        if self.app_url is not None:
+            self.app_id, self.countries, self.app_name = AppStoreScrapperConfig.parse_app_url(self.app_url)
+        else:
+            if not self.app_id and self.app_name:
+                self.app_id = AppStoreScrapperConfig.search_id(self.app_name)
+
+        if not self.app_id:
+            raise ValueError("Valid `package_name`, `app_name` or `app_url` is mandatory")
+
+        self.countries = self.countries or ["us"]
+        self.app_name = self.app_name or self.app_id
+
+    @classmethod
+    def parse_app_url(cls, app_url: str) -> Tuple[Optional[str], Optional[List[str]], Optional[str]]:
+        parsed_url = parse.urlparse(app_url)
+        url_paths = parsed_url.path.split("/")
+
+        countries = app_name = app_id = None
+        if len(url_paths) == 5:
+            countries = [url_paths[1]]
+            app_name = url_paths[3]
+            app_ids = url_paths[4].split("id")
+            app_id = None if len(app_ids) != 2 else app_ids[1]
+
+        return app_id, countries, app_name
+
+    # Code is influenced from https://github.com/cowboy-bebug/app-store-scraper
+    @classmethod
+    def search_id(cls, app_name: str, store: str = "app") -> str:
+        if store == "app":
+            landing_url = "apps.apple.com"
+            request_host = "amp-api.apps.apple.com"
+        else:
+            landing_url = "podcasts.apple.com"
+            request_host = "amp-api.podcasts.apple.com"
+
+        base_request_url = f"https://{request_host}"
+        search_response = perform_search(
+            request_url=base_request_url, query=f"app store {app_name}"
+        )
+
+        pattern = fr"{landing_url}/[a-z]{{2}}/.+?/id([0-9]+)"
+        match_object = re.search(pattern, search_response.text)
+        if match_object:
+            app_id = str(match_object.group(1))
+        else:
+            raise RuntimeError("Pattern matching is not found")
+        return app_id
+
+
+class AppStoreScrapperSource(BaseSource):
+    NAME: Optional[str] = "AppStoreScrapper"
+
+    def lookup(self, config: AppStoreScrapperConfig, **kwargs: Any) -> List[TextPayload]:  # type: ignore[override]
+        source_responses: List[TextPayload] = []
+
+        # Get data from state
+        identifier: str = kwargs.get("id", None)
+        state: Optional[Dict[str, Any]] = (
+            None
+            if identifier is None or self.store is None
+            else self.store.get_source_state(identifier)
+        )
+        update_state: bool = True if identifier else False
+        state = state or dict()
+
+        if config.countries is None or len(config.countries) == 0:
+            logger.warning("`countries` in config should not be empty or None")
+            return source_responses
+
+        for country in config.countries:
+            country_stat: Dict[str, Any] = state.get(country, dict())
+            lookup_period: str = country_stat.get("since_time", config.lookup_period)
+            lookup_period = lookup_period or DEFAULT_LOOKUP_PERIOD
+            if len(lookup_period) <= 5:
+                since_time = convert_utc_time(lookup_period)
+            else:
+                since_time = datetime.strptime(lookup_period, DATETIME_STRING_PATTERN)
+                since_time = since_time.replace(tzinfo=timezone.utc)
+
+            last_since_time: datetime = since_time
+
+            since_id: Optional[int] = country_stat.get("since_id", None)
+            last_index = since_id
+            state[country] = country_stat
+
+            scrapper = AppStoreReviewsReader(country=country, app_id=config.app_id)
+            reviews = scrapper.fetch_reviews(after=since_time, since_id=since_id)
+            reviews = reviews or []
+            if config.max_count is not None and config.max_count < len(reviews):
+                reviews = reviews[: config.max_count]
+
+            for review in reviews:
+                source_responses.append(
+                    TextPayload(
+                        processed_text=f"{review.title}. {review.content}",
+                        meta=vars(review) if hasattr(review, "__dict__") else review,
+                        source_name=self.NAME,
+                    )
+                )
+
+                review_time = review.date.replace(tzinfo=timezone.utc)
+                if review_time < since_time:
+                    break
+                if last_since_time is None or last_since_time < review_time:
+                    last_since_time = review_time
+                if last_index is None or last_index < review.id:
+                    last_index = review.id
+
+            country_stat["since_time"] = last_since_time.strftime(
+                DATETIME_STRING_PATTERN
+            )
+            country_stat["since_id"] = last_index
+
+        if update_state and self.store is not None:
+            self.store.update_source_state(workflow_id=identifier, state=state)
+
+        return source_responses
diff --git a/obsei_module/obsei/source/base_source.py b/obsei_module/obsei/source/base_source.py
new file mode 100644
index 0000000000000000000000000000000000000000..fd2312cf4ef65a0ef58ad21efda401502a99f1c0
--- /dev/null
+++ b/obsei_module/obsei/source/base_source.py
@@ -0,0 +1,25 @@
+from abc import abstractmethod
+from typing import List, Optional, Any
+
+from pydantic_settings import BaseSettings
+
+from obsei.payload import TextPayload
+from obsei.workflow.base_store import BaseStore
+
+
+class BaseSourceConfig(BaseSettings):
+    TYPE: str = "Base"
+
+    class Config:
+        arbitrary_types_allowed = True
+
+
+class BaseSource(BaseSettings):
+    store: Optional[BaseStore] = None
+
+    @abstractmethod
+    def lookup(self, config: BaseSourceConfig, **kwargs: Any) -> List[TextPayload]:
+        pass
+
+    class Config:
+        arbitrary_types_allowed = True
diff --git a/obsei_module/obsei/source/email_source.py b/obsei_module/obsei/source/email_source.py
new file mode 100644
index 0000000000000000000000000000000000000000..907463b6e41357b60dded8989b13a04e82a4dcb9
--- /dev/null
+++ b/obsei_module/obsei/source/email_source.py
@@ -0,0 +1,288 @@
+import email
+import imaplib
+import logging
+from datetime import datetime
+from email.header import decode_header
+from email.message import Message
+from typing import Any, Dict, List, Optional
+
+import pytz
+from pydantic import Field, PrivateAttr, SecretStr
+from pydantic_settings import BaseSettings
+
+from obsei.payload import TextPayload
+from obsei.misc.utils import (
+    DATETIME_STRING_PATTERN,
+    DEFAULT_LOOKUP_PERIOD,
+    convert_utc_time,
+    text_from_html,
+)
+from obsei.source.base_source import BaseSource, BaseSourceConfig
+
+logger = logging.getLogger(__name__)
+
+
+class EmailCredInfo(BaseSettings):
+    username: Optional[SecretStr] = Field(None, env="email_username")
+    password: Optional[SecretStr] = Field(None, env="email_password")
+
+
+class EmailConfig(BaseSourceConfig):
+    # This is done to avoid exposing member to API response
+    _imap_client: imaplib.IMAP4 = PrivateAttr()
+    TYPE: str = "Email"
+    # List of IMAP servers for most commonly used email providers
+    # https://www.systoolsgroup.com/imap/
+    # Also, if you're using a Gmail account then make sure you allow less secure apps on your account -
+    # https://myaccount.google.com/lesssecureapps?pli=1
+    # Also enable IMAP access -
+    # https://mail.google.com/mail/u/0/#settings/fwdandpop
+    imap_server: str
+    imap_port: Optional[int] = None
+    download_attachments: Optional[bool] = False
+    mailboxes: List[str] = Field(["INBOX"])
+    cred_info: Optional[EmailCredInfo] = Field(None)
+    lookup_period: Optional[str] = None
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+
+        self.cred_info = self.cred_info or EmailCredInfo()
+
+        if self.cred_info.password is None or self.cred_info.username is None:
+            raise ValueError("Email account `username` and `password` is required")
+        if self.imap_port:
+            self._imap_client = imaplib.IMAP4_SSL(
+                host=self.imap_server, port=self.imap_port
+            )
+        else:
+            self._imap_client = imaplib.IMAP4_SSL(self.imap_server)
+
+        self._imap_client.login(
+            user=self.cred_info.username.get_secret_value(),
+            password=self.cred_info.password.get_secret_value(),
+        )
+
+    def __del__(self) -> None:
+        # self._imap_client.close()
+        self._imap_client.logout()
+
+    def get_client(self) -> imaplib.IMAP4:
+        return self._imap_client
+
+
+class EmailSource(BaseSource):
+    NAME: str = "Email"
+
+    @staticmethod
+    def clean(text: str) -> str:
+        # clean text for creating a folder
+        return "".join(c if c.isalnum() else "_" for c in text)
+
+    def lookup(self, config: EmailConfig, **kwargs: Any) -> List[TextPayload]:  # type: ignore[override]
+        source_responses: List[TextPayload] = []
+
+        # Get data from state
+        id: str = kwargs.get("id", None)
+        state: Optional[Dict[str, Any]] = (
+            None
+            if id is None or self.store is None
+            else self.store.get_source_state(id)
+        )
+        update_state: bool = True if id else False
+        state = state or dict()
+
+        imap_client = config.get_client()
+
+        for mailbox in config.mailboxes:
+            need_more_lookup = True
+
+            status, messages = imap_client.select(mailbox=mailbox, readonly=True)
+            if status != "OK":
+                logger.warning(f"Not able to connect with {mailbox}: {status}")
+                continue
+
+            mailbox_stat: Dict[str, Any] = state.get(mailbox, dict())
+            lookup_period: str = mailbox_stat.get(
+                "since_time", config.lookup_period or DEFAULT_LOOKUP_PERIOD
+            )
+            if len(lookup_period) <= 5:
+                since_time = convert_utc_time(lookup_period)
+            else:
+                since_time = datetime.strptime(lookup_period, DATETIME_STRING_PATTERN)
+
+            if since_time.tzinfo is None:
+                since_time = since_time.replace(tzinfo=pytz.utc)
+            else:
+                since_time = since_time.astimezone(pytz.utc)
+
+            last_since_time: datetime = since_time
+            since_id: Optional[int] = mailbox_stat.get("since_message_id", None)
+            last_index = since_id
+
+            state[mailbox] = mailbox_stat
+
+            num_of_emails = int(str(messages[0]))
+
+            # Read in reverse order means latest emails first
+            # Most of code is borrowed from https://www.thepythoncode.com/article/reading-emails-in-python and
+            # modified to suite here
+            for index in range(num_of_emails, 0, -1):
+                email_meta: Dict[str, Any] = dict()
+
+                # fetch the email message by ID
+                status, email_message = imap_client.fetch(str(index), "(RFC822)")
+
+                email_content: str = ""
+
+                for response in email_message:
+                    if isinstance(response, tuple):
+                        # parse a bytes email into a message object
+                        msg = email.message_from_bytes(response[1])
+
+                        email_meta["subject"] = self._parse_email_header(msg, "Subject")
+                        email_meta["from_address"] = self._parse_email_header(
+                            msg, "From"
+                        )
+                        email_meta["to_address"] = self._parse_email_header(msg, "To")
+                        date_received_str = self._parse_email_header(msg, "Date")
+
+                        try:
+                            date_received = datetime.strptime(
+                                date_received_str, "%a, %d %b %Y %H:%M:%S %Z"
+                            )
+                        except Exception:
+                            try:
+                                date_received = datetime.strptime(
+                                    date_received_str, "%a, %d %b %Y %H:%M:%S %z"
+                                )
+                            except Exception:
+                                date_received = datetime.strptime(
+                                    date_received_str, "%a, %d %b %Y %H:%M:%S %z (%Z)"
+                                )
+
+                        if date_received.tzinfo is None:
+                            date_received = date_received.replace(tzinfo=pytz.utc)
+                        else:
+                            date_received = date_received.astimezone(pytz.utc)
+                        email_meta["date_received"] = date_received
+                        email_meta["message_id"] = self._parse_email_header(
+                            msg, "Message-ID"
+                        )
+
+                        part_id = 0
+                        # if the email message is multipart
+                        if msg.is_multipart():
+                            # iterate over email parts
+                            for part in msg.walk():
+                                part_id_str = f"part_{part_id}"
+                                # extract content type of email
+                                content_type = part.get_content_type()
+                                content_disposition = str(
+                                    part.get("Content-Disposition")
+                                )
+
+                                email_meta[part_id_str] = dict()
+                                email_meta[part_id_str]["content_type"] = content_type
+                                email_meta[part_id_str][
+                                    "content_disposition"
+                                ] = content_disposition
+
+                                if (
+                                    "attachment" not in content_disposition
+                                    and "text/" in content_type
+                                ):
+                                    try:
+                                        # get the email body
+                                        email_body = part.get_payload(
+                                            decode=True
+                                        ).decode()
+                                        if content_type == "text/html":
+                                            email_body = text_from_html(email_body)
+                                        # append email body with existing
+                                        email_meta[part_id_str][
+                                            "email_body"
+                                        ] = email_body
+                                        email_content = (
+                                            email_content + "\n" + email_body
+                                        )
+                                    except Exception:
+                                        logger.error("Unable to parse email body")
+                                elif "attachment" in content_disposition:
+                                    logger.warning(
+                                        "Email attachment download is not supported"
+                                    )
+                                    # Download attachment is commented currently
+                                    # # download attachment
+                                    # filename = part.get_filename()
+                                    # if filename:
+                                    #    folder_name = self.clean(subject)
+                                    #    if not os.path.isdir(folder_name):
+                                    #        # make a folder for this email (named after the subject)
+                                    #        os.mkdir(folder_name)
+                                    #    filepath = os.path.join(folder_name, filename)
+                                    #    # download attachment and save it
+                                    #    open(filepath, "wb").write(part.get_payload(decode=True))
+
+                                part_id = part_id + 1
+                        else:
+                            part_id_str = f"part_{part_id}"
+                            email_meta[part_id_str] = dict()
+                            # extract content type of email
+                            content_type = msg.get_content_type()
+                            email_meta[part_id_str]["content_type"] = content_type
+
+                            # get the email body
+                            email_body = msg.get_payload(decode=True).decode()
+                            if content_type == "text/html":
+                                email_body = text_from_html(email_body)
+
+                            email_meta[part_id_str]["email_body"] = email_body
+                            email_content = email_content + "\n" + email_body
+
+                        if date_received <= since_time:
+                            need_more_lookup = False
+                            break
+                        if last_index and last_index == email_meta["message_id"]:
+                            need_more_lookup = False
+                            break
+                        if last_since_time is None or last_since_time < date_received:
+                            last_since_time = date_received
+                        if last_index is None:
+                            last_index = email_meta["message_id"]
+
+                        source_responses.append(
+                            TextPayload(
+                                processed_text="\n".join(
+                                    [email_meta.get("subject", ""), email_content]
+                                ),
+                                meta=email_meta,
+                                source_name=self.NAME,
+                            )
+                        )
+
+                if not need_more_lookup:
+                    break
+
+            mailbox_stat["since_time"] = last_since_time.strftime(
+                DATETIME_STRING_PATTERN
+            )
+            mailbox_stat["since_comment_id"] = last_index
+
+        if update_state and self.store is not None:
+            self.store.update_source_state(workflow_id=id, state=state)
+
+        return source_responses
+
+    @staticmethod
+    def _email_cleanup(content: str):  # type: ignore[no-untyped-def]
+        # TODO: Implement the method to cleanup email contents
+        pass
+
+    @staticmethod
+    def _parse_email_header(header: Message, key: str) -> str:
+        value, encoding = decode_header(header[key])[0]
+        if isinstance(value, bytes):
+            # if it's a bytes, decode to str
+            return "" if not encoding else value.decode(encoding)
+        return str(value)
diff --git a/obsei_module/obsei/source/facebook_source.py b/obsei_module/obsei/source/facebook_source.py
new file mode 100644
index 0000000000000000000000000000000000000000..84416f95a321358369bb142fd45e0963180c20e3
--- /dev/null
+++ b/obsei_module/obsei/source/facebook_source.py
@@ -0,0 +1,177 @@
+import logging
+from datetime import datetime
+from typing import Any, Dict, List, Optional
+
+from pydantic import Field, PrivateAttr
+from pydantic.types import SecretStr
+from pydantic_settings import BaseSettings
+from pyfacebook import FacebookApi
+
+from obsei.misc.utils import (
+    DATETIME_STRING_PATTERN,
+    DEFAULT_LOOKUP_PERIOD,
+    convert_utc_time,
+    obj_to_json,
+    convert_datetime_str_to_epoch,
+)
+from obsei.payload import TextPayload
+from obsei.source.base_source import BaseSource, BaseSourceConfig
+
+logger = logging.getLogger(__name__)
+
+
+class FacebookCredentials(BaseSettings):
+    app_id: Optional[SecretStr] = Field(None, env="facebook_app_id")
+    app_secret: Optional[SecretStr] = Field(None, env="facebook_app_secret")
+    long_term_token: Optional[SecretStr] = Field(None, env="facebook_long_term_token")
+
+
+class FacebookSourceConfig(BaseSourceConfig):
+    _api_client: FacebookApi = PrivateAttr()
+    TYPE: str = "Facebook"
+    page_id: str
+    post_ids: Optional[List[str]] = None
+    lookup_period: Optional[str] = None
+    max_post: Optional[int] = 50
+    cred_info: Optional[FacebookCredentials] = Field(None)
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+
+        self.cred_info = self.cred_info or FacebookCredentials()
+
+        if self.cred_info.long_term_token is not None:
+            application_only_auth = False
+        elif self.cred_info.app_id is not None and self.cred_info.app_secret is not None:
+            application_only_auth = True
+        else:
+            raise AttributeError("`app_id`, `app_secret` and `long_term_token` required to connect to Facebook")
+
+        self._api_client = FacebookApi(
+            app_id=self.cred_info.app_id.get_secret_value() if self.cred_info.app_id else None,
+            app_secret=self.cred_info.app_secret.get_secret_value() if self.cred_info.app_secret else None,
+            access_token=self.cred_info.long_term_token.get_secret_value() if self.cred_info.long_term_token else None,
+            application_only_auth=application_only_auth,
+        )
+
+    def get_client(self) -> FacebookApi:
+        return self._api_client
+
+
+class FacebookSource(BaseSource):
+    NAME: str = "Facebook"
+
+    def lookup(self, config: FacebookSourceConfig, **kwargs: Any) -> List[TextPayload]:  # type: ignore[override]
+        source_responses: List[TextPayload] = []
+
+        # Get data from state
+        identifier: str = kwargs.get("id", None)
+        state: Optional[Dict[str, Any]] = (
+            None
+            if identifier is None or self.store is None
+            else self.store.get_source_state(identifier)
+        )
+        update_state: bool = True if identifier else False
+        state = state or dict()
+        since_timestamp: Optional[int] = state.get("since_timestamp", None)
+        if since_timestamp is None:
+            lookup_period = config.lookup_period or DEFAULT_LOOKUP_PERIOD
+            if len(lookup_period) <= 5:
+                since_time = convert_utc_time(lookup_period)
+            else:
+                since_time = datetime.strptime(lookup_period, DATETIME_STRING_PATTERN)
+
+            since_timestamp = int(since_time.timestamp())
+        self.log_object("Since: ", str(datetime.fromtimestamp(since_timestamp)))
+        post_last_since_time = since_timestamp
+
+        api = config.get_client()
+        post_ids = config.post_ids
+        if not post_ids:
+            posts = api.page.get_posts(
+                page_id=config.page_id,
+                count=config.max_post,
+                since_time=str(since_timestamp),
+                return_json=True,
+            )
+            self.log_object("Posts: ", str(posts))
+            post_ids = []
+            for post in posts:
+                post_update_time = convert_datetime_str_to_epoch(post["updated_time"])
+                if post_update_time is not None:
+                    if post_update_time < since_timestamp:
+                        break
+
+                    if (
+                        post_last_since_time is None
+                        or post_last_since_time < post_update_time
+                    ):
+                        post_last_since_time = post_update_time
+                else:
+                    logger.warning("Unable to parse post update time: {}", post["updated_time"])
+
+                post_ids.append(post["id"])
+
+        for post_id in post_ids:
+            # Collect post state
+            post_stat: Dict[str, Any] = state.get(post_id, dict())
+            state[post_id] = post_stat
+
+            comment_since_time = state.get("since_timestamp", since_timestamp)
+            comment_last_since_time = comment_since_time
+
+            comments, comment_summary = api.page.get_comments(
+                object_id=post_id,
+                filter_type="stream",
+                order_type="reverse_chronological",
+            )
+            self.log_object("Comments: ", str(comments))
+            self.log_object("Comment Summary: ", str(comment_summary))
+
+            for comment in comments:
+                comment_created_time = convert_datetime_str_to_epoch(
+                    comment.created_time
+                )
+                if comment_created_time < comment_since_time:
+                    break
+
+                if (
+                    comment_last_since_time is None
+                    or comment_last_since_time < comment_created_time
+                ):
+                    comment_last_since_time = comment_created_time
+
+                source_responses.append(
+                    TextPayload(
+                        processed_text=comment.message,
+                        meta=vars(comment),
+                        source_name=self.NAME,
+                    )
+                )
+
+            post_stat["since_timestamp"] = comment_last_since_time
+
+        state["since_timestamp"] = post_last_since_time
+
+        # TODO: See how to augment with with comments data
+        # if config.include_title_description:
+        #     text_payloads = [
+        #         TextPayload(
+        #             processed_text=f"{data['title']}. {data['description']}",
+        #             meta=data,
+        #             source_name=self.NAME,
+        #         )
+        #         for post in posts
+        #         for data in post["attachments"]["data"]
+        #     ]
+        #
+        #     source_responses.extend(text_payloads)
+
+        if update_state and self.store is not None:
+            self.store.update_source_state(workflow_id=identifier, state=state)
+
+        return source_responses
+
+    @staticmethod
+    def log_object(message: str, result: Any) -> None:
+        logger.debug(message + str(obj_to_json(result)))
diff --git a/obsei_module/obsei/source/google_maps_reviews.py b/obsei_module/obsei/source/google_maps_reviews.py
new file mode 100644
index 0000000000000000000000000000000000000000..9054e74105a27183eb0e447424b96d07fc188b70
--- /dev/null
+++ b/obsei_module/obsei/source/google_maps_reviews.py
@@ -0,0 +1,124 @@
+import logging
+from datetime import datetime
+from typing import Optional, List, Any, Dict
+
+import requests
+from pydantic import SecretStr, Field
+
+from obsei.misc.utils import convert_utc_time, DATETIME_STRING_PATTERN
+from obsei.payload import TextPayload
+from obsei.source.base_source import BaseSourceConfig, BaseSource
+
+logger = logging.getLogger(__name__)
+OUTSCRAPPER_API_URL = 'https://api.app.outscraper.com'
+
+
+class OSGoogleMapsReviewsConfig(BaseSourceConfig):
+    NAME: str = "Maps Reviews Scrapper"
+    queries: List[str]
+    sort: str = "newest"
+    ignore_empty_reviews: bool = True
+    language: str = "en"
+    since_timestamp: Optional[int] = None
+    until_timestamp: Optional[int] = None
+    lookup_period: Optional[str] = None
+    number_of_reviews: int = 10
+    number_of_places_per_query: int = 1
+    country: Optional[str] = None
+    filtered_fields: List[str] = Field(['reviews_data'])
+    # parameter defines the coordinates of the location where you want your query to be applied.
+    # It has to be constructed in the next sequence: "@" + "latitude" + "," + "longitude" + "," + "zoom"
+    # (e.g. "@41.3954381,2.1628662,15.1z").
+    central_coordinates: Optional[str] = None
+    # Get API key from https://outscraper.com/
+    api_key: Optional[SecretStr] = Field(None, env="outscrapper_api_key")
+
+    def __init__(self, **values: Any):
+        super().__init__(**values)
+
+        if self.api_key is None:
+            raise ValueError("OutScrapper API key require to fetch reviews data")
+
+
+class OSGoogleMapsReviewsSource(BaseSource):
+    NAME: str = "Maps Reviews Scrapper"
+
+    def lookup(self, config: OSGoogleMapsReviewsConfig, **kwargs: Any) -> List[TextPayload]:  # type: ignore[override]
+        source_responses: List[TextPayload] = []
+
+        # Get data from state
+        identifier: str = kwargs.get("id", None)
+
+        state: Optional[Dict[str, Any]] = (
+            None
+            if id is None or self.store is None
+            else self.store.get_source_state(identifier)
+        )
+
+        update_state: bool = True if identifier else False
+        state = state or dict()
+
+        since_timestamp: Optional[int] = (
+             None if state is None else state.get("since_timestamp", None)
+        )
+        since_timestamp = since_timestamp or config.since_timestamp
+        if since_timestamp is None and config.lookup_period is not None:
+            if len(config.lookup_period) <= 5:
+                since_time = convert_utc_time(config.lookup_period)
+            else:
+                since_time = datetime.strptime(config.lookup_period, DATETIME_STRING_PATTERN)
+
+            since_timestamp = int(since_time.timestamp())
+
+        last_reviews_since_time = since_timestamp
+
+        params: Dict[str, Any] = {
+            'query': config.queries,
+            'reviewsLimit': config.number_of_reviews,
+            'limit': config.number_of_places_per_query,
+            'sort': config.sort,
+            # Reviews are sorted from latest to oldest in case cutoff or start is passed
+            # cutoff is oldest timestamp till reviews are needed
+            'cutoff': since_timestamp,
+            # start is newest timestamp from reviews are needed
+            'start': config.until_timestamp,
+            'ignoreEmpty': config.ignore_empty_reviews,
+            'coordinates': config.central_coordinates,
+            'language': config.language,
+            'region': config.country,
+            'fields': ",".join(config.filtered_fields),
+            'async': False,
+        }
+
+        # For API doc refer https://app.outscraper.com/api-docs#tag/Google-Reviews
+        response = requests.get(f'{OUTSCRAPPER_API_URL}/maps/reviews-v3', params=params, headers={
+            'X-API-KEY': "" if config.api_key is None else config.api_key.get_secret_value(),
+        })
+
+        queries_data = []
+        if response.status_code == 200:
+            queries_data = response.json().get('data', [])
+        else:
+            logger.warning(f"API call failed with error: {response.json()}")
+
+        for query_data in queries_data:
+            reviews = [] if "reviews_data" not in query_data else query_data.pop("reviews_data")
+
+            for review in reviews:
+                source_responses.append(
+                    TextPayload(
+                        processed_text=review["review_text"],
+                        meta={**review, **query_data},
+                        source_name=self.NAME,
+                    )
+                )
+                review_time = review["review_timestamp"]
+
+                if last_reviews_since_time is None or last_reviews_since_time < review_time:
+                    last_reviews_since_time = review_time
+
+        state["since_timestamp"] = last_reviews_since_time
+        if update_state and self.store is not None:
+            self.store.update_source_state(workflow_id=identifier, state=state)
+
+        return source_responses
diff --git a/obsei_module/obsei/source/google_news_source.py b/obsei_module/obsei/source/google_news_source.py
new file mode 100644
index 0000000000000000000000000000000000000000..f1fcf985fdc4b3704f69269274fcd9b319390394
--- /dev/null
+++ b/obsei_module/obsei/source/google_news_source.py
@@ -0,0 +1,160 @@
+from typing import Any, Dict, List, Optional
+from urllib import parse
+
+import dateparser
+from GoogleNews import GoogleNews
+from pydantic import PrivateAttr
+from datetime import datetime, date, timedelta, time, timezone
+
+from obsei.payload import TextPayload
+from obsei.misc.utils import DATETIME_STRING_PATTERN, convert_utc_time, DEFAULT_LOOKUP_PERIOD
+from obsei.source.base_source import BaseSource, BaseSourceConfig
+from obsei.source.website_crawler_source import (
+    BaseCrawlerConfig,
+    TrafilaturaCrawlerConfig,
+)
+
+GOOGLE_DATE_TIME_QUERY_PATTERN = "%Y-%m-%d"
+
+
+class GoogleNewsConfig(BaseSourceConfig):
+    _google_news_client: GoogleNews = PrivateAttr()
+    TYPE: str = "GoogleNews"
+    query: str
+    country: Optional[str] = "US"
+    language: Optional[str] = "en"
+    max_results: Optional[int] = 100
+    lookup_period: Optional[str] = None
+    after_date: Optional[str] = None  # latest time
+    before_date: Optional[str] = None  # oldest time
+    fetch_article: Optional[bool] = False
+    crawler_config: Optional[BaseCrawlerConfig] = None
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+
+        if self.lookup_period and self.after_date:
+            raise AttributeError("Can't use `lookup_period` and `after_date` both")
+        elif not self.after_date and self.before_date:
+            raise AttributeError("Can't use `before_date` without `after_date` or `lookup_period`")
+
+        if self.lookup_period:
+            after_time = convert_utc_time(self.lookup_period)
+            self.after_date = after_time.strftime(GOOGLE_DATE_TIME_QUERY_PATTERN)
+
+        if not self.before_date:
+            before_time = datetime.combine(date.today(), time(tzinfo=timezone.utc)) + timedelta(days=1)
+            self.before_date = before_time.strftime(GOOGLE_DATE_TIME_QUERY_PATTERN)
+
+        self._google_news_client = GoogleNews(
+            lang=self.language,
+            region=self.country
+        )
+
+        if not self.crawler_config:
+            self.crawler_config = TrafilaturaCrawlerConfig(urls=[])
+
+    def get_client(self) -> GoogleNews:
+        return self._google_news_client
+
+
+class GoogleNewsSource(BaseSource):
+    NAME: Optional[str] = "GoogleNews"
+
+    def lookup(self, config: GoogleNewsConfig, **kwargs: Any) -> List[TextPayload]:  # type: ignore[override]
+        source_responses: List[TextPayload] = []
+
+        # Get data from state
+        id: str = kwargs.get("id", None)
+        state: Optional[Dict[str, Any]] = (
+            None
+            if id is None or self.store is None
+            else self.store.get_source_state(id)
+        )
+        update_state: bool = True if id else False
+        state = state or dict()
+        lookup_period: str = state.get("since_time", None) or DEFAULT_LOOKUP_PERIOD
+        since_time: datetime = convert_utc_time(lookup_period)
+        last_since_time = since_time
+
+        today_start_of_day: datetime = datetime.combine(date.today(), time(tzinfo=timezone.utc))
+        today_end_of_day: datetime = today_start_of_day + timedelta(days=1)
+
+        last_after_time: datetime  # start_time
+        if config.after_date:
+            last_after_time = convert_utc_time(config.after_date)
+        else:
+            last_after_time = today_start_of_day
+
+        if state.get("since_time", None) is not None:
+            last_after_time = since_time \
+                if since_time > last_after_time \
+                else last_since_time
+
+        before_time: datetime  # end time
+        if config.before_date and config.after_date:
+            before_time = convert_utc_time(config.before_date)
+        else:
+            before_time = today_end_of_day
+
+        if before_time > today_start_of_day:
+            before_time = today_end_of_day
+
+        google_news_client = config.get_client()
+        more_data_exist = True
+        while more_data_exist and before_time > last_after_time:
+            after_time = before_time - timedelta(days=1)
+            after_date = after_time.strftime(GOOGLE_DATE_TIME_QUERY_PATTERN)
+            before_date = before_time.strftime(GOOGLE_DATE_TIME_QUERY_PATTERN)
+
+            new_query = f'{config.query}+after:{after_date}+before:{before_date}'
+            # query = parse.quote(new_query, errors='ignore')
+
+            before_time = after_time
+
+            google_news_client.get_news(new_query)
+            articles = google_news_client.results(sort=True)
+
+            for article in articles:
+                published_date = (
+                    None
+                    if article["datetime"] is None
+                    else article["datetime"].replace(tzinfo=timezone.utc)
+                )
+
+                article_text: str = ""
+                if config.fetch_article and config.crawler_config:
+                    extracted_data = config.crawler_config.extract_url(url=article["link"])
+
+                    if extracted_data.get("text", None) is not None:
+                        article_text = extracted_data["text"]
+                        del extracted_data["text"]
+
+                    article["extracted_data"] = extracted_data
+
+                source_responses.append(
+                    TextPayload(
+                        processed_text=f"{article['title']}.\n\n {article_text}",
+                        meta=vars(article) if hasattr(article, "__dict__") else article,
+                        source_name=self.NAME,
+                    )
+                )
+
+                if config.max_results is not None and len(source_responses) >= config.max_results:
+                    source_responses = source_responses[:config.max_results]
+                    more_data_exist = False
+                    break
+
+                if published_date and since_time and published_date < since_time:
+                    more_data_exist = False
+                    break
+                if last_since_time is None or (
+                        published_date and last_since_time < published_date
+                ):
+                    last_since_time = published_date
+
+            if update_state and last_since_time and self.store is not None:
+                state["since_time"] = last_since_time.strftime(DATETIME_STRING_PATTERN)
+                self.store.update_source_state(workflow_id=id, state=state)
+
+        return source_responses
diff --git a/obsei_module/obsei/source/pandas_source.py b/obsei_module/obsei/source/pandas_source.py
new file mode 100644
index 0000000000000000000000000000000000000000..537ecc25ba4b680d0e4146b85ff2ba937d33e833
--- /dev/null
+++ b/obsei_module/obsei/source/pandas_source.py
@@ -0,0 +1,54 @@
+from typing import List, Optional, Any
+
+from pandas import DataFrame
+
+from obsei.source.base_source import BaseSource, BaseSourceConfig
+from obsei.payload import TextPayload
+
+
+class PandasSourceConfig(BaseSourceConfig):
+    TYPE: str = "Pandas"
+
+    dataframe: DataFrame
+    text_columns: List[str]
+    separator: str = " "
+    include_columns: Optional[List[str]] = None
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+
+        if len(self.text_columns) == 0:
+            raise ValueError("`text_columns` cannot be empty")
+
+        if not all(
+            [text_column in self.dataframe.columns for text_column in self.text_columns]
+        ):
+            raise ValueError("Every `text_columns` should be present in `dataframe`")
+
+        try:
+            self.dataframe[self.text_columns] = self.dataframe[
+                self.text_columns
+            ].astype("string")
+        except TypeError as e:
+            raise ValueError("Unable to convert `text_columns` to string dtype")
+
+
+class PandasSource(BaseSource):
+    NAME: Optional[str] = "Pandas"
+
+    def lookup(self, config: PandasSourceConfig, **kwargs: Any) -> List[TextPayload]:  # type: ignore[override]
+        df_to_records = config.dataframe.to_dict("records")
+        source_responses: List[TextPayload] = [
+            TextPayload(
+                processed_text=config.separator.join(
+                    [record.get(text_column) for text_column in config.text_columns]
+                ),
+                meta={key: record[key] for key in config.include_columns}
+                if config.include_columns is not None
+                else record,
+                source_name=self.NAME,
+            )
+            for record in df_to_records
+        ]
+
+        return source_responses
diff --git a/obsei_module/obsei/source/playstore_reviews.py b/obsei_module/obsei/source/playstore_reviews.py
new file mode 100644
index 0000000000000000000000000000000000000000..99ab15497f2837f62aa6e03f96903b393ff13aa6
--- /dev/null
+++ b/obsei_module/obsei/source/playstore_reviews.py
@@ -0,0 +1,128 @@
+from typing import Any, Dict, List, Optional
+
+from google.auth.credentials import Credentials
+from google.oauth2 import service_account
+from googleapiclient.discovery import build
+from pydantic import Field, SecretStr, PrivateAttr
+from pydantic_settings import BaseSettings
+
+from obsei.payload import TextPayload
+from obsei.source.base_source import BaseSource, BaseSourceConfig
+
+
+class GoogleCredInfo(BaseSettings):
+    # Currently only service_account_file type credential supported
+    # Refer: https://developers.google.com/identity/protocols/oauth2/service-account
+    service_cred_file: Optional[str] = Field(None, env="google_service_cred_file")
+    developer_key: Optional[SecretStr] = Field(None, env="google_developer_key")
+    scopes: List[str] = ["https://www.googleapis.com/auth/androidpublisher"]
+
+
+class PlayStoreConfig(BaseSourceConfig):
+    _credentials: Credentials = PrivateAttr()
+    TYPE: str = "PlayStore"
+    package_name: str
+    start_index: Optional[int] = None
+    max_results: int = 10
+    num_retries: int = 1
+    with_quota_project_id: Optional[str] = None
+    with_subject: Optional[str] = None
+    cred_info: Optional[GoogleCredInfo] = Field(None)
+
+    def __init__(self, **values: Any):
+        super().__init__(**values)
+
+        self.cred_info = self.cred_info or GoogleCredInfo()
+
+        if self.cred_info.service_cred_file is None or self.cred_info.developer_key is None:
+            raise ValueError("`service_cred_file` and `developer_key` can't be empty")
+
+        self._credentials = service_account.Credentials.from_service_account_file(
+            filename=self.cred_info.service_cred_file, scopes=self.cred_info.scopes
+        )
+
+        if self.with_quota_project_id is not None:
+            self._credentials = self._credentials.with_quota_project(self.with_quota_project_id)
+
+        if self.with_subject is not None:
+            self._credentials = self._credentials.with_subject(self.with_subject)
+
+    def get_google_credentials(self) -> Credentials:
+        return self._credentials
+
+    def get_developer_key(self) -> str:
+        if self.cred_info is None or self.cred_info.developer_key is None:
+            raise ValueError("`developer_key` can't be empty")
+        return self.cred_info.developer_key.get_secret_value()
+
+
+class PlayStoreSource(BaseSource):
+    NAME: str = "PlayStore"
+
+    def lookup(self, config: PlayStoreConfig, **kwargs: Any) -> List[TextPayload]:  # type: ignore[override]
+        source_responses: List[TextPayload] = []
+        # Refer https://github.com/googleapis/google-api-python-client/blob/master/docs/start.md
+        with build(
+                serviceName="androidpublisher",
+                version="v3",
+                credentials=config.get_google_credentials(),
+                developerKey=config.get_developer_key(),
+        ) as service:
+            reviews = service.reviews()
+            pagination_token: Optional[str] = None
+
+            # Get data from state
+            id: str = kwargs.get("id", None)
+            state: Optional[Dict[str, Any]] = (
+                None
+                if id is None or self.store is None
+                else self.store.get_source_state(id)
+            )
+            start_index: Optional[int] = (
+                config.start_index or None
+                if state is None
+                else state.get("start_index", None)
+            )
+            update_state: bool = True if id else False
+            state = state or dict()
+            review_id = start_index
+
+            while True:
+                # Refer following link -
+                # https://googleapis.github.io/google-api-python-client/docs/dyn/androidpublisher_v3.reviews.html#list
+                responses = reviews.list(
+                    package_name=config.package_name,
+                    max_results=config.max_results,
+                    start_index=start_index,
+                    token=pagination_token,
+                )
+
+                if "reviews" in responses:
+                    reviews = responses["responses"]
+                    for review in reviews:
+                        if "comments" not in review:
+                            continue
+
+                        review_id = review["reviewId"]
+
+                        # Currently only one user comment is supported
+                        text = review["comments"][0]["userComment"]["text"]
+                        source_responses.append(
+                            TextPayload(
+                                processed_text=text, meta=review, source_name=self.NAME
+                            )
+                        )
+
+                pagination_token = None
+                if "tokenPagination" in responses:
+                    if "nextPageToken" in responses["tokenPagination"]:
+                        pagination_token = responses["tokenPagination"]["nextPageToken"]
+
+                if pagination_token is None:
+                    break
+
+        if update_state and self.store is not None:
+            state["start_index"] = review_id
+            self.store.update_source_state(workflow_id=id, state=state)
+
+        return source_responses
diff --git a/obsei_module/obsei/source/playstore_scrapper.py b/obsei_module/obsei/source/playstore_scrapper.py
new file mode 100644
index 0000000000000000000000000000000000000000..a7e3472e18c38ba8380c766bc46774effd8f97bc
--- /dev/null
+++ b/obsei_module/obsei/source/playstore_scrapper.py
@@ -0,0 +1,168 @@
+import logging
+import re
+from datetime import datetime, timezone
+from typing import Any, Dict, List, Optional, Tuple
+from urllib import parse
+
+from google_play_scraper import Sort, reviews
+
+from obsei.misc.web_search import perform_search
+from obsei.source.base_source import BaseSource, BaseSourceConfig
+from obsei.payload import TextPayload
+from obsei.misc.utils import (
+    DATETIME_STRING_PATTERN,
+    DEFAULT_LOOKUP_PERIOD,
+    convert_utc_time,
+)
+
+logger = logging.getLogger(__name__)
+
+
+class PlayStoreScrapperConfig(BaseSourceConfig):
+    TYPE: str = "PlayStoreScrapper"
+    app_url: Optional[str] = None
+    countries: Optional[List[str]] = None
+    package_name: Optional[str] = None
+    app_name: Optional[str] = None
+    language: Optional[str] = None
+    filter_score_with: Optional[int] = None
+    lookup_period: Optional[str] = None
+    max_count: Optional[int] = 200
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+
+        if self.app_url is not None:
+            self.package_name, self.countries, self.language = PlayStoreScrapperConfig.parse_app_url(self.app_url)
+        else:
+            if not self.package_name and self.app_name:
+                self.package_name = PlayStoreScrapperConfig.search_package_name(
+                    self.app_name
+                )
+
+        if not self.package_name:
+            raise ValueError("Valid `package_name`, `app_name` or `app_url` is mandatory")
+
+        self.language = self.language or "en"
+        self.countries = self.countries or ["us"]
+        self.app_name = self.app_name or self.package_name
+
+    @classmethod
+    def parse_app_url(cls, app_url: str) -> Tuple[Optional[str], Optional[List[str]], Optional[str]]:
+
+        parsed_url = parse.urlparse(app_url)
+        query_dict = parse.parse_qs(parsed_url.query)
+        countries = query_dict.get('gl', None)
+
+        language = None
+        languages = query_dict.get('hl', None)
+        if languages is not None:
+            language = languages[0]
+
+        package_name = None
+        package_ids = query_dict.get('id', None)
+        if package_ids is not None:
+            package_name = package_ids[0]
+
+        return package_name, countries, language
+
+    @classmethod
+    def search_package_name(cls, app_name: str) -> str:
+        base_request_url = f"https://play.google.com"
+        search_response = perform_search(
+            request_url=base_request_url, query=f"play store {app_name}"
+        )
+
+        pattern = r"play.google.com/store/apps/details.+?id=([0-9a-z.]+)"
+        match_object = re.search(pattern, search_response.text)
+        if match_object:
+            app_id = str(match_object.group(1))
+        else:
+            raise RuntimeError("Pattern matching is not found")
+        return app_id
+
+
+class PlayStoreScrapperSource(BaseSource):
+    NAME: Optional[str] = "PlayStoreScrapper"
+
+    def lookup(  # type: ignore[override]
+        self, config: PlayStoreScrapperConfig, **kwargs: Any
+    ) -> List[TextPayload]:
+        source_responses: List[TextPayload] = []
+
+        # Get data from state
+        id: str = kwargs.get("id", None)
+        state: Optional[Dict[str, Any]] = (
+            None
+            if id is None or self.store is None
+            else self.store.get_source_state(id)
+        )
+        update_state: bool = True if id else False
+        state = state or dict()
+
+        if config.countries is None or len(config.countries) == 0:
+            logger.warning("`countries` in config should not be empty or None")
+            return source_responses
+
+        for country in config.countries:
+            country_stat: Dict[str, Any] = state.get(country, dict())
+            lookup_period: str = country_stat.get("since_time", config.lookup_period)
+            lookup_period = lookup_period or DEFAULT_LOOKUP_PERIOD
+            if len(lookup_period) <= 5:
+                since_time = convert_utc_time(lookup_period)
+            else:
+                since_time = datetime.strptime(lookup_period, DATETIME_STRING_PATTERN)
+
+            last_since_time: datetime = since_time
+
+            # since_id: Optional[str] = country_stat.get("since_id", None)
+            # last_index = since_id
+            # state[scrapper.country] = country_stat
+
+            continuation_token = None
+            while True:
+                store_reviews, continuation_token = reviews(
+                    app_id=config.package_name,
+                    lang=config.language,
+                    country=country,
+                    sort=Sort.NEWEST,
+                    filter_score_with=config.filter_score_with,
+                    continuation_token=continuation_token,
+                    count=config.max_count,
+                )
+                store_reviews = store_reviews or []
+
+                for review in store_reviews:
+                    source_responses.append(
+                        TextPayload(
+                            processed_text=review["content"],
+                            meta=review,
+                            source_name=self.NAME,
+                        )
+                    )
+                    review_time = review["at"].replace(tzinfo=timezone.utc)
+
+                    if since_time > review_time:
+                        break
+
+                    if last_since_time is None or last_since_time < review_time:
+                        last_since_time = review_time
+                    # if last_index is None or last_index < review.id:
+                    #    last_index = review.id
+
+                if (
+                    continuation_token is None
+                    or continuation_token.token is None
+                    or continuation_token.count <= len(source_responses)
+                ):
+                    break
+
+            country_stat["since_time"] = last_since_time.strftime(
+                DATETIME_STRING_PATTERN
+            )
+            # country_stat["since_id"] = last_index
+
+        if update_state and self.store is not None:
+            self.store.update_source_state(workflow_id=id, state=state)
+
+        return source_responses
diff --git a/obsei_module/obsei/source/reddit_scrapper.py b/obsei_module/obsei/source/reddit_scrapper.py
new file mode 100644
index 0000000000000000000000000000000000000000..ca79601f1ba86a00fcd3904bcb506eb2fb331e38
--- /dev/null
+++ b/obsei_module/obsei/source/reddit_scrapper.py
@@ -0,0 +1,108 @@
+import logging
+from datetime import datetime, timezone
+from typing import Any, Dict, List, Optional
+
+import mmh3
+from pydantic import PrivateAttr
+from reddit_rss_reader.reader import RedditContent, RedditRSSReader
+
+from obsei.source.base_source import BaseSource, BaseSourceConfig
+from obsei.payload import TextPayload
+from obsei.misc.utils import (
+    DATETIME_STRING_PATTERN,
+    DEFAULT_LOOKUP_PERIOD,
+    convert_utc_time,
+)
+
+logger = logging.getLogger(__name__)
+
+
+class RedditScrapperConfig(BaseSourceConfig):
+    _scrapper: RedditRSSReader = PrivateAttr()
+    TYPE: str = "RedditScrapper"
+    url: str
+    url_id: Optional[str] = None
+    user_agent: Optional[str] = None
+    lookup_period: Optional[str] = None
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+
+        # Using 32 bit hash
+        self.url_id = self.url_id or "{:02x}".format(mmh3.hash(self.url, signed=False))
+
+        self._scrapper = RedditRSSReader(
+            url=self.url,
+            user_agent=self.user_agent
+            if self.user_agent
+            else "script {url_hash}".format(url_hash=self.url_id),
+        )
+
+    def get_readers(self) -> RedditRSSReader:
+        return self._scrapper
+
+
+class RedditScrapperSource(BaseSource):
+    NAME: Optional[str] = "RedditScrapper"
+
+    def lookup(self, config: RedditScrapperConfig, **kwargs: Any) -> List[TextPayload]:  # type: ignore[override]
+        source_responses: List[TextPayload] = []
+
+        # Get data from state
+        identifier: str = kwargs.get("id", None)
+        state: Optional[Dict[str, Any]] = (
+            None
+            if identifier is None or self.store is None
+            else self.store.get_source_state(identifier)
+        )
+        update_state: bool = True if identifier else False
+        state = state or dict()
+
+        scrapper_stat: Dict[str, Any] = (
+            dict() if not config.url_id else state.get(config.url_id, dict())
+        )
+        lookup_period: str = scrapper_stat.get("since_time", config.lookup_period)
+        lookup_period = lookup_period or DEFAULT_LOOKUP_PERIOD
+        since_time = convert_utc_time(lookup_period)
+
+        last_since_time: datetime = since_time
+
+        since_id: Optional[str] = scrapper_stat.get("since_id", None)
+        last_index = since_id
+        if config.url_id:
+            state[config.url_id] = scrapper_stat
+
+        reddit_data: Optional[List[RedditContent]] = None
+        try:
+            reddit_data = config.get_readers().fetch_content(
+                after=since_time, since_id=since_id
+            )
+        except RuntimeError as ex:
+            logger.warning(ex.__cause__)
+
+        reddit_data = reddit_data or []
+
+        for reddit in reddit_data:
+            source_responses.append(
+                TextPayload(
+                    processed_text=f"{reddit.title}. {reddit.extracted_text}",
+                    meta=reddit.__dict__,
+                    source_name=self.NAME,
+                )
+            )
+
+            comment_time = reddit.updated.replace(tzinfo=timezone.utc)
+
+            if last_since_time is None or last_since_time < comment_time:
+                last_since_time = comment_time
+            if last_index is None:
+                # Assuming list is sorted based on time
+                last_index = reddit.id
+
+        scrapper_stat["since_time"] = last_since_time.strftime(DATETIME_STRING_PATTERN)
+        scrapper_stat["since_id"] = last_index
+
+        if update_state and self.store is not None:
+            self.store.update_source_state(workflow_id=identifier, state=state)
+
+        return source_responses
diff --git a/obsei_module/obsei/source/reddit_source.py b/obsei_module/obsei/source/reddit_source.py
new file mode 100644
index 0000000000000000000000000000000000000000..67a0603c4ab9bdfd20e95472f296d5a7d0b51ea7
--- /dev/null
+++ b/obsei_module/obsei/source/reddit_source.py
@@ -0,0 +1,150 @@
+from datetime import datetime
+from typing import Any, Dict, List, Optional
+
+from praw import Reddit
+from pydantic import Field, PrivateAttr, SecretStr
+from pydantic_settings import BaseSettings
+
+from obsei.payload import TextPayload
+from obsei.misc.utils import (
+    DATETIME_STRING_PATTERN,
+    DEFAULT_LOOKUP_PERIOD,
+    convert_utc_time,
+    text_from_html,
+)
+from obsei.source.base_source import BaseSource, BaseSourceConfig
+
+
+class RedditCredInfo(BaseSettings):
+    # Create credential at https://www.reddit.com/prefs/apps
+    # Also refer https://praw.readthedocs.io/en/latest/getting_started/authentication.html
+    # Currently Password Flow, Read Only Mode and Saved Refresh Token Mode are supported
+    client_id: SecretStr = Field(None, env="reddit_client_id")
+    client_secret: SecretStr = Field(None, env="reddit_client_secret")
+    user_agent: str = "Test User Agent"
+    redirect_uri: Optional[str] = None
+    refresh_token: Optional[SecretStr] = Field(None, env="reddit_refresh_token")
+    username: Optional[str] = Field(None, env="reddit_username")
+    password: Optional[SecretStr] = Field(None, env="reddit_password")
+    read_only: bool = True
+
+
+class RedditConfig(BaseSourceConfig):
+    # This is done to avoid exposing member to API response
+    _reddit_client: Reddit = PrivateAttr()
+    TYPE: str = "Reddit"
+    subreddits: List[str]
+    post_ids: Optional[List[str]] = None
+    lookup_period: Optional[str] = None
+    include_post_meta: Optional[bool] = True
+    post_meta_field: str = "post_meta"
+    cred_info: Optional[RedditCredInfo] = Field(None)
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+
+        self.cred_info = self.cred_info or RedditCredInfo()
+
+        self._reddit_client = Reddit(
+            client_id=self.cred_info.client_id.get_secret_value(),
+            client_secret=self.cred_info.client_secret.get_secret_value(),
+            redirect_uri=self.cred_info.redirect_uri,
+            user_agent=self.cred_info.user_agent,
+            refresh_token=self.cred_info.refresh_token.get_secret_value()
+            if self.cred_info.refresh_token
+            else None,
+            username=self.cred_info.username if self.cred_info.username else None,
+            password=self.cred_info.password.get_secret_value()
+            if self.cred_info.password
+            else None,
+        )
+
+        self._reddit_client.read_only = self.cred_info.read_only
+
+    def get_reddit_client(self) -> Reddit:
+        return self._reddit_client
+
+
+class RedditSource(BaseSource):
+    NAME: str = "Reddit"
+
+    def lookup(self, config: RedditConfig, **kwargs: Any) -> List[TextPayload]:  # type: ignore[override]
+        source_responses: List[TextPayload] = []
+
+        # Get data from state
+        id: str = kwargs.get("id", None)
+        state: Optional[Dict[str, Any]] = (
+            None
+            if id is None or self.store is None
+            else self.store.get_source_state(id)
+        )
+        update_state: bool = True if id else False
+        state = state or dict()
+
+        subreddit_reference = config.get_reddit_client().subreddit(
+            "+".join(config.subreddits)
+        )
+        post_stream = subreddit_reference.stream.submissions(pause_after=-1)
+        for post in post_stream:
+            if post is None:
+                break
+
+            post_data = vars(post)
+            post_id = post_data["id"]
+            if config.post_ids and not config.post_ids.__contains__(post_id):
+                continue
+
+            post_stat: Dict[str, Any] = state.get(post_id, dict())
+            lookup_period: str = post_stat.get("since_time", config.lookup_period)
+            lookup_period = lookup_period or DEFAULT_LOOKUP_PERIOD
+            if len(lookup_period) <= 5:
+                since_time = convert_utc_time(lookup_period)
+            else:
+                since_time = datetime.strptime(lookup_period, DATETIME_STRING_PATTERN)
+
+            last_since_time: datetime = since_time
+
+            since_id: Optional[str] = post_stat.get("since_comment_id", None)
+            last_index = since_id
+            state[post_id] = post_stat
+
+            post.comment_sort = "new"
+            post.comments.replace_more(limit=None)
+
+            # top_level_comments only
+            first_comment = True
+            for comment in post.comments:
+                comment_data = vars(comment)
+                if config.include_post_meta:
+                    comment_data[config.post_meta_field] = post_data
+
+                comment_time = datetime.utcfromtimestamp(
+                    int(comment_data["created_utc"])
+                )
+                comment_id = comment_data["id"]
+
+                if comment_time < since_time:
+                    break
+                if last_index and last_index == comment_id:
+                    break
+                if last_since_time is None or last_since_time < comment_time:
+                    last_since_time = comment_time
+                if last_index is None or first_comment:
+                    last_index = comment_id
+                    first_comment = False
+
+                text = "".join(text_from_html(comment_data["body_html"]))
+
+                source_responses.append(
+                    TextPayload(
+                        processed_text=text, meta=comment_data, source_name=self.NAME
+                    )
+                )
+
+            post_stat["since_time"] = last_since_time.strftime(DATETIME_STRING_PATTERN)
+            post_stat["since_comment_id"] = last_index
+
+        if update_state and self.store is not None:
+            self.store.update_source_state(workflow_id=id, state=state)
+
+        return source_responses
diff --git a/obsei_module/obsei/source/twitter_source.py b/obsei_module/obsei/source/twitter_source.py
new file mode 100644
index 0000000000000000000000000000000000000000..819a4f502c973f234759cd67acfe759e3e8f620e
--- /dev/null
+++ b/obsei_module/obsei/source/twitter_source.py
@@ -0,0 +1,327 @@
+import logging
+from datetime import datetime
+
+import pytz
+import requests
+
+from typing import Any, Dict, List, Optional
+
+from pydantic import Field
+from pydantic.types import SecretStr
+from pydantic_settings import BaseSettings
+from searchtweets import collect_results, gen_request_parameters
+
+from obsei.source.base_source import BaseSource, BaseSourceConfig
+from obsei.payload import TextPayload
+
+from obsei.misc.utils import convert_utc_time
+
+logger = logging.getLogger(__name__)
+
+TWITTER_OAUTH_ENDPOINT = "https://api.twitter.com/oauth2/token"
+
+DEFAULT_MAX_TWEETS = 10
+
+DEFAULT_TWEET_FIELDS = [
+    "author_id",
+    "conversation_id",
+    "created_at",
+    "entities",
+    "geo",
+    "id",
+    "in_reply_to_user_id",
+    "lang",
+    "public_metrics",
+    "referenced_tweets",
+    "source",
+    "text",
+    "withheld",
+]
+DEFAULT_EXPANSIONS = [
+    "author_id",
+    "entities.mentions.username",
+    "geo.place_id",
+    "in_reply_to_user_id",
+    "referenced_tweets.id",
+    "referenced_tweets.id.author_id",
+]
+DEFAULT_PLACE_FIELDS = [
+    "contained_within",
+    "country",
+    "country_code",
+    "full_name",
+    "geo",
+    "id",
+    "name",
+    "place_type",
+]
+DEFAULT_USER_FIELDS = [
+    "created_at",
+    "description",
+    "entities",
+    "id",
+    "location",
+    "name",
+    "public_metrics",
+    "url",
+    "username",
+    "verified",
+]
+DEFAULT_OPERATORS = ["-is:reply", "-is:retweet"]
+
+
+class TwitterCredentials(BaseSettings):
+    bearer_token: SecretStr = Field("", env="twitter_bearer_token")
+    consumer_key: SecretStr = Field("", env="twitter_consumer_key")
+    consumer_secret: SecretStr = Field("", env="twitter_consumer_secret")
+    endpoint: str = Field(
+        "https://api.twitter.com/2/tweets/search/recent", env="twitter_endpoint"
+    )
+    extra_headers_dict: Optional[Dict[str, Any]] = None
+
+
+class TwitterSourceConfig(BaseSourceConfig):
+    TYPE: str = "Twitter"
+    query: Optional[str] = None
+    keywords: Optional[List[str]] = None
+    hashtags: Optional[List[str]] = None
+    usernames: Optional[List[str]] = None
+    operators: Optional[List[str]] = Field(DEFAULT_OPERATORS)
+    since_id: Optional[int] = None
+    until_id: Optional[int] = None
+    lookup_period: Optional[str] = None
+    tweet_fields: Optional[List[str]] = Field(DEFAULT_TWEET_FIELDS)
+    user_fields: Optional[List[str]] = Field(DEFAULT_USER_FIELDS)
+    expansions: Optional[List[str]] = Field(DEFAULT_EXPANSIONS)
+    place_fields: Optional[List[str]] = Field(DEFAULT_PLACE_FIELDS)
+    max_tweets: int = DEFAULT_MAX_TWEETS
+    cred_info: TwitterCredentials = Field(None)
+    credential: Optional[TwitterCredentials] = None
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+
+        self.cred_info = self.cred_info or TwitterCredentials()
+
+        if self.credential is not None:
+            logger.warning("`credential` is deprecated; use `cred_info`")
+            self.cred_info = self.credential
+
+        if self.cred_info.bearer_token.get_secret_value() == '':
+            if self.cred_info.consumer_key.get_secret_value() == '' \
+                    or self.cred_info.consumer_secret.get_secret_value() == '':
+                raise AttributeError(
+                    "consumer_key and consumer_secret required to generate bearer_token via Twitter"
+                )
+
+            self.cred_info.bearer_token = SecretStr(self.generate_bearer_token())
+
+        if self.max_tweets > 100:
+            logger.warning("Twitter API support max 100 tweets per call, hence resetting `max_tweets` to 100")
+            self.max_tweets = 100
+
+    def get_twitter_credentials(self) -> Dict[str, Any]:
+        if self.cred_info.bearer_token.get_secret_value() == '':
+            self.cred_info.bearer_token = SecretStr(self.generate_bearer_token())
+
+        return {
+            "bearer_token": self.cred_info.bearer_token.get_secret_value(),
+            "endpoint": self.cred_info.endpoint,
+            "extra_headers_dict": self.cred_info.extra_headers_dict,
+        }
+
+    # Copied from Twitter searchtweets-v2 lib
+    def generate_bearer_token(self) -> str:
+        """
+        Return the bearer token for a given pair of consumer key and secret values.
+        """
+        data = [("grant_type", "client_credentials")]
+        resp = requests.post(
+            TWITTER_OAUTH_ENDPOINT,
+            data=data,
+            auth=(
+                self.cred_info.consumer_key.get_secret_value(),
+                self.cred_info.consumer_secret.get_secret_value(),
+            ),
+        )
+        logger.warning("Grabbing bearer token from OAUTH")
+        if resp.status_code >= 400:
+            logger.error(resp.text)
+            resp.raise_for_status()
+
+        return str(resp.json()["access_token"])
+
+
+class TwitterSource(BaseSource):
+    NAME: str = "Twitter"
+
+    def lookup(self, config: TwitterSourceConfig, **kwargs: Any) -> List[TextPayload]:  # type: ignore[override]
+        if (
+            not config.query
+            and not config.keywords
+            and not config.hashtags
+            and not config.usernames
+        ):
+            raise AttributeError(
+                "At least one non empty parameter required (query, keywords, hashtags, and usernames)"
+            )
+
+        place_fields = (
+            ",".join(config.place_fields) if config.place_fields is not None else None
+        )
+        user_fields = (
+            ",".join(config.user_fields) if config.user_fields is not None else None
+        )
+        expansions = (
+            ",".join(config.expansions) if config.expansions is not None else None
+        )
+        tweet_fields = (
+            ",".join(config.tweet_fields) if config.tweet_fields is not None else None
+        )
+
+        # Get data from state
+        identifier: str = kwargs.get("id", None)
+        state: Optional[Dict[str, Any]] = (
+            None
+            if identifier is None or self.store is None
+            else self.store.get_source_state(identifier)
+        )
+        since_id: Optional[int] = (
+            config.since_id or None if state is None else state.get("since_id", None)
+        )
+        until_id: Optional[int] = (
+            config.until_id or None if state is None else state.get("until_id", None)
+        )
+        update_state: bool = True if identifier else False
+        state = state or dict()
+        max_tweet_id = since_id
+        lookup_period = config.lookup_period
+        if lookup_period is None:
+            start_time = None
+        elif len(lookup_period) <= 5:
+            start_time = convert_utc_time(lookup_period).replace(tzinfo=pytz.UTC)
+        else:
+            start_time = datetime.strptime(lookup_period, "%Y-%m-%dT%H:%M:%S%z")
+
+        if since_id or until_id:
+            lookup_period = None
+
+        query = self._generate_query_string(
+            query=config.query,
+            keywords=config.keywords,
+            hashtags=config.hashtags,
+            usernames=config.usernames,
+            operators=config.operators,
+        )
+
+        source_responses: List[TextPayload] = []
+
+        search_query = gen_request_parameters(
+            granularity=None,
+            query=query,
+            results_per_call=config.max_tweets,
+            place_fields=place_fields,
+            expansions=expansions,
+            user_fields=user_fields,
+            tweet_fields=tweet_fields,
+            since_id=since_id,
+            until_id=until_id,
+            start_time=lookup_period,
+            stringify=False,
+        )
+        logger.info(search_query)
+
+        tweets_output = collect_results(
+            query=search_query,
+            max_tweets=config.max_tweets,
+            result_stream_args=config.get_twitter_credentials(),
+        )
+
+        tweets: List[Dict[str, Any]] = []
+        users: List[Dict[str, Any]] = []
+        meta_info: Dict[str, Any] = {}
+
+        if not tweets_output and len(tweets_output) == 0:
+            logger.info("No Tweets found")
+        else:
+            tweets = tweets_output[0]["data"] if "data" in tweets_output[0] else tweets
+            if "includes" in tweets_output[0] and "users" in tweets_output[0]["includes"]:
+                users = tweets_output[0]["includes"]["users"]
+            meta_info = tweets_output[0]["meta"] if "meta" in tweets_output[0] else meta_info
+
+        # Extract user info and create user map
+        user_map: Dict[str, Dict[str, Any]] = {}
+        if len(users) > 0 and "id" in users[0]:
+            for user in users:
+                if "username" in user:
+                    user["user_url"] = f'https://twitter.com/{user["username"]}'
+                user_map[user["id"]] = user
+
+        logger.info(f"Twitter API meta_info='{meta_info}'")
+
+        for tweet in tweets:
+            if "author_id" in tweet and tweet["author_id"] in user_map:
+                tweet["author_info"] = user_map.get(tweet["author_id"])
+
+            source_responses.append(self._get_source_output(tweet))
+
+            if start_time:
+                created_date = datetime.strptime(
+                    tweet["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z"
+                )
+                if start_time > created_date:
+                    break
+
+        max_tweet_id = meta_info["newest_id"] if "newest_id" in meta_info else max_tweet_id
+        # min_tweet_id = meta_info["oldest_id"] if "oldest_id" in meta_info else min_tweet_id
+
+        if update_state and self.store is not None:
+            state["since_id"] = max_tweet_id
+            self.store.update_source_state(workflow_id=identifier, state=state)
+
+        return source_responses
+
+    @staticmethod
+    def _generate_query_string(
+        query: Optional[str] = None,
+        keywords: Optional[List[str]] = None,
+        hashtags: Optional[List[str]] = None,
+        usernames: Optional[List[str]] = None,
+        operators: Optional[List[str]] = None,
+    ) -> str:
+        if query:
+            return query
+
+        or_tokens = []
+        and_tokens = []
+
+        or_tokens_list = [keywords, hashtags, usernames]
+        for tokens in or_tokens_list:
+            if tokens:
+                if len(tokens) > 0:
+                    or_tokens.append(f'({" OR ".join(tokens)})')
+                else:
+                    or_tokens.append(f'{"".join(tokens)}')
+
+        and_query_str = ""
+        or_query_str = ""
+
+        if or_tokens:
+            if len(or_tokens) > 0:
+                or_query_str = f'{" OR ".join(or_tokens)}'
+            else:
+                or_query_str = f'{"".join(or_tokens)}'
+
+        if operators:
+            and_tokens.append(f'{" ".join(operators)}')
+
+        if and_tokens:
+            and_query_str = f' ({" ".join(and_tokens)})' if and_tokens else ""
+
+        return or_query_str + and_query_str
+
+    def _get_source_output(self, tweet: Dict[str, Any]) -> TextPayload:
+        tweet["tweet_url"] = f'https://twitter.com/twitter/statuses/{tweet["id"]}'
+        return TextPayload(
+            processed_text=tweet["text"], meta=tweet, source_name=self.NAME
+        )
diff --git a/obsei_module/obsei/source/website_crawler_source.py b/obsei_module/obsei/source/website_crawler_source.py
new file mode 100644
index 0000000000000000000000000000000000000000..c1ebf0d2f1b65d92695ec7bf0942f7fb8bd5f2f3
--- /dev/null
+++ b/obsei_module/obsei/source/website_crawler_source.py
@@ -0,0 +1,129 @@
+import json
+import logging
+from abc import abstractmethod
+from typing import List, Optional, Dict, Any
+
+import mmh3
+
+from obsei.payload import TextPayload
+from obsei.source.base_source import BaseSource, BaseSourceConfig
+
+logger = logging.getLogger(__name__)
+
+
+class BaseCrawlerConfig(BaseSourceConfig):
+    TYPE: str = "BaseCrawler"
+
+    @abstractmethod
+    def extract_url(self, url: str, url_id: Optional[str] = None) -> Dict[str, Any]:
+        pass
+
+    @abstractmethod
+    def find_urls(self, url: str) -> List[str]:
+        pass
+
+
+class TrafilaturaCrawlerConfig(BaseCrawlerConfig):
+    # To understand about these configuration params refer:
+    # https://trafilatura.readthedocs.io/
+    _output_format: str = "json"
+    TYPE: str = "Crawler"
+    urls: List[str]
+    include_comments: bool = False
+    include_tables: bool = True
+    no_fallback: bool = False
+    include_images: bool = False
+    include_formatting: bool = False
+    deduplicate: bool = True
+    no_ssl: bool = False
+    is_feed: bool = False
+    is_sitemap: bool = False
+    include_links: bool = True
+    target_language: Optional[str] = None
+    url_blacklist: Optional[List[str]] = None
+
+    def extract_url(self, url: str, url_id: Optional[str] = None) -> Dict[str, Any]:
+        try:
+            from trafilatura import extract, fetch_url
+        except:
+            logger.error("Trafilatura is not installed, install as follows: pip install trafilatura")
+            return {}
+
+        url_id = url_id or "{:02x}".format(mmh3.hash(url, signed=False))
+        url_content = fetch_url(
+            url=url,
+            no_ssl=self.no_ssl,
+        )
+        extracted_dict: Dict[str, Any] = {}
+        if url_content is not None:
+            extracted_data = extract(
+                filecontent=url_content,
+                record_id=url_id,
+                no_fallback=self.no_fallback,
+                output_format=self._output_format,
+                include_comments=self.include_comments,
+                include_tables=self.include_tables,
+                include_images=self.include_images,
+                include_formatting=self.include_formatting,
+                include_links=self.include_links,
+                deduplicate=self.deduplicate,
+                url_blacklist=self.url_blacklist,
+                target_language=self.target_language
+            )
+
+            if extracted_data:
+                extracted_dict = json.loads(extracted_data)
+                if "raw-text" in extracted_dict:
+                    del extracted_dict["raw-text"]
+
+        return extracted_dict
+
+    def find_urls(self, url: str) -> List[str]:
+        try:
+            from trafilatura import feeds, sitemaps
+        except:
+            logger.error("Trafilatura is not installed, install as follows: pip install trafilatura")
+            return []
+
+        urls: List[str] = []
+        if self.is_sitemap:
+            urls = sitemaps.sitemap_search(url=url, target_lang=self.target_language)
+        elif self.is_feed:
+            urls = feeds.find_feed_urls(url=url, target_lang=self.target_language)
+
+        return urls
+
+
+class TrafilaturaCrawlerSource(BaseSource):
+    NAME: Optional[str] = "Crawler"
+
+    def lookup(  # type: ignore[override]
+        self, config: TrafilaturaCrawlerConfig, **kwargs: Any
+    ) -> List[TextPayload]:
+        source_responses: List[TextPayload] = []
+
+        final_urls = []
+        if config.is_sitemap or config.is_feed:
+            for url in config.urls:
+                final_urls.extend(config.find_urls(url=url))
+        else:
+            final_urls = config.urls
+
+        for url in final_urls:
+            extracted_data = config.extract_url(url=url)
+            if extracted_data is None:
+                logger.warning(f"Unable to crawl {url}, hence skipping it")
+                continue
+            comments = (
+                "" if "comments" not in extracted_data else extracted_data["comments"]
+            )
+            source_responses.append(
+                TextPayload(
+                    processed_text=f"{extracted_data['text']}. {comments}",
+                    meta=extracted_data,
+                    source_name=self.NAME,
+                )
+            )
+
+        return source_responses
+
diff --git a/obsei_module/obsei/source/youtube_reviews.py b/obsei_module/obsei/source/youtube_reviews.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/obsei_module/obsei/source/youtube_scrapper.py b/obsei_module/obsei/source/youtube_scrapper.py
new file mode 100644
index 0000000000000000000000000000000000000000..406a03a522ddd39d29c846731bf69ad2e78b7d32
--- /dev/null
+++ b/obsei_module/obsei/source/youtube_scrapper.py
@@ -0,0 +1,111 @@
+import logging
+from datetime import datetime
+
+from pydantic import PrivateAttr
+from typing import Optional, List, Any, Dict
+
+from obsei.misc.utils import DEFAULT_LOOKUP_PERIOD, convert_utc_time, DATETIME_STRING_PATTERN
+from obsei.misc.youtube_reviews_scrapper import YouTubeCommentExtractor
+from obsei.payload import TextPayload
+from obsei.source.base_source import BaseSource, BaseSourceConfig
+
+logger = logging.getLogger(__name__)
+
+
+class YoutubeScrapperConfig(BaseSourceConfig):
+    _YT_VIDEO_URL: str = PrivateAttr('https://www.youtube.com/watch?v={video_id}')
+    TYPE: str = "YoutubeScrapper"
+    video_id: Optional[str] = None
+    video_url: Optional[str] = None
+    user_agent: str = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36'
+    sort_by: int = 1  # 0 = sort by popular, 1 = sort by recent
+    max_comments: Optional[int] = 20
+    fetch_replies: bool = False
+    lang_code: Optional[str] = None
+    sleep_time: float = 0.1
+    request_retries: int = 5
+    lookup_period: Optional[str] = None
+
+    def __init__(self, **data: Any):
+        super().__init__(**data)
+
+        if not self.video_id and not self.video_url:
+            raise ValueError("Either `video_id` or `video_url` is required")
+
+        if not self.video_url:
+            self.video_url = self._YT_VIDEO_URL.format(video_id=self.video_id)
+
+
+class YoutubeScrapperSource(BaseSource):
+    NAME: Optional[str] = "YoutubeScrapper"
+
+    def lookup(self, config: YoutubeScrapperConfig, **kwargs: Any) -> List[TextPayload]:  # type: ignore[override]
+        source_responses: List[TextPayload] = []
+
+        # Get data from state
+        identifier: str = kwargs.get("id", None)
+        state: Optional[Dict[str, Any]] = (
+            None
+            if id is None or self.store is None
+            else self.store.get_source_state(identifier)
+        )
+        update_state: bool = True if identifier else False
+        state = state or dict()
+
+        lookup_period: str = state.get("since_time", config.lookup_period)
+        lookup_period = lookup_period or DEFAULT_LOOKUP_PERIOD
+        if len(lookup_period) <= 5:
+            since_time = convert_utc_time(lookup_period)
+        else:
+            since_time = datetime.strptime(lookup_period, DATETIME_STRING_PATTERN)
+
+        last_since_time: datetime = since_time
+        since_id: Optional[str] = state.get("since_id", None)
+        last_index = since_id
+
+        comments: Optional[List[Dict[str, Any]]] = None
+        try:
+            if not config.video_url:
+                raise RuntimeError("`video_url` in config should not be empty or None")
+
+            scrapper: YouTubeCommentExtractor = YouTubeCommentExtractor(
+                video_url=config.video_url,
+                user_agent=config.user_agent,
+                sort_by=config.sort_by,
+                max_comments=config.max_comments,
+                fetch_replies=config.fetch_replies,
+                lang_code=config.lang_code,
+                sleep_time=config.sleep_time,
+                request_retries=config.request_retries,
+            )
+
+            comments = scrapper.fetch_comments(until_datetime=since_time)
+        except RuntimeError as ex:
+            logger.warning(ex.__cause__)
+
+        comments = comments or []
+
+        for comment in comments:
+            source_responses.append(
+                TextPayload(
+                    processed_text=comment["text"],
+                    meta=comment,
+                    source_name=self.NAME,
+                )
+            )
+
+            comment_time = comment["time"]
+
+            if comment_time is not None and (last_since_time is None or last_since_time < comment_time):
+                last_since_time = comment_time
+            if last_index is None:
+                # Assuming list is sorted based on time
+                last_index = comment["comment_id"]
+
+        state["since_time"] = last_since_time.strftime(DATETIME_STRING_PATTERN)
+        state["since_id"] = last_index
+
+        if update_state and self.store is not None:
+            self.store.update_source_state(workflow_id=identifier, state=state)
+
+        return source_responses
diff --git a/obsei_module/obsei/workflow/__init__.py b/obsei_module/obsei/workflow/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/obsei_module/obsei/workflow/__pycache__/__init__.cpython-311.pyc b/obsei_module/obsei/workflow/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..673b7359c7a49b5f5a706d6951353f78dd66c217
Binary files /dev/null and b/obsei_module/obsei/workflow/__pycache__/__init__.cpython-311.pyc differ
diff --git a/obsei_module/obsei/workflow/base_store.py b/obsei_module/obsei/workflow/base_store.py
new file mode 100644
index 0000000000000000000000000000000000000000..4cd761bdacac404cc85e076010ad0e88eb397661
--- /dev/null
+++ b/obsei_module/obsei/workflow/base_store.py
@@ -0,0 +1,34 @@
+from abc import abstractmethod
+from typing import Any, Dict, Optional
+
+from pydantic_settings import BaseSettings
+
+
+class BaseStore(BaseSettings):
+    @abstractmethod
+    def get_source_state(self, id: str) -> Optional[Dict[str, Any]]:
+        pass
+
+    @abstractmethod
+    def get_sink_state(self, id: str) -> Optional[Dict[str, Any]]:
+        pass
+
+    @abstractmethod
+    def get_analyzer_state(self, id: str) -> Optional[Dict[str, Any]]:
+        pass
+
+    @abstractmethod
+    def update_source_state(self, workflow_id: str, state: Dict[str, Any]) -> Optional[Any]:
+        pass
+
+    @abstractmethod
+    def update_sink_state(self, workflow_id: str, state: Dict[str, Any]) -> None:
+        pass
+
+    @abstractmethod
+    def update_analyzer_state(self, workflow_id: str, state: Dict[str, Any]) -> None:
+        pass
+
+    @abstractmethod
+    def delete_workflow(self, id: str) -> None:
+        pass
diff --git a/obsei_module/obsei/workflow/store.py b/obsei_module/obsei/workflow/store.py
new file mode 100644
index 0000000000000000000000000000000000000000..73501cf99d8e2c76bbb83ae046a94ac7b6a0738f
--- /dev/null
+++ b/obsei_module/obsei/workflow/store.py
@@ -0,0 +1,198 @@
+import json
+import logging
+from typing import Any, Dict, List, Optional
+from uuid import uuid4
+
+from pydantic import PrivateAttr
+from sqlalchemy import Column, DateTime, String, create_engine, func
+from sqlalchemy.ext.declarative import declarative_base
+from sqlalchemy.orm import sessionmaker
+
+from obsei.misc.utils import obj_to_json
+from obsei.workflow.base_store import BaseStore
+from obsei.workflow.workflow import WorkflowState, WorkflowConfig, Workflow
+
+logger = logging.getLogger(__name__)
+
+Base = declarative_base()  # type: Any
+
+
+class ORMBase(Base): # type: ignore
+    __abstract__ = True
+
+    id = Column(String(100), default=lambda: str(uuid4()), primary_key=True)
+    created = Column(DateTime, server_default=func.now())
+    updated = Column(DateTime, server_default=func.now(), server_onupdate=func.now())
+
+
+class WorkflowTable(ORMBase):
+    __tablename__ = "workflow"
+
+    config = Column(String(2000), nullable=False)
+    source_state = Column(String(500), nullable=True)
+    sink_state = Column(String(500), nullable=True)
+    analyzer_state = Column(String(500), nullable=True)
+
+
+class WorkflowStore(BaseStore):
+    _session: sessionmaker = PrivateAttr()
+
+    def __init__(self, url: str = "sqlite:///obsei.db", **data: Any):
+        super().__init__(**data)
+        engine = create_engine(url)
+        ORMBase.metadata.create_all(engine)
+        local_session = sessionmaker(bind=engine)
+        self._session = local_session()
+
+    def get(self, identifier: str) -> Optional[Workflow]:
+        row = self._session.query(WorkflowTable).filter_by(id=identifier).all()
+        return (
+            None
+            if row is None or len(row) == 0
+            else self._convert_sql_row_to_workflow_data(row[0])
+        )
+
+    def get_all(self) -> List[Workflow]:
+        rows = self._session.query(WorkflowTable).all()
+        return [self._convert_sql_row_to_workflow_data(row) for row in rows]
+
+    def get_workflow_state(self, identifier: str) -> Optional[WorkflowState]:
+        row = (
+            self._session.query(
+                WorkflowTable.source_state,
+                WorkflowTable.analyzer_state,
+                WorkflowTable.sink_state,
+            )
+            .filter(id=identifier)
+            .all()
+        )
+
+        return (
+            None
+            if row is None or len(row) == 0
+            else self._convert_sql_row_to_workflow_state(row[0])
+        )
+
+    def get_source_state(self, identifier: str) -> Optional[Dict[str, Any]]:
+        row = (
+            self._session.query(WorkflowTable.source_state)
+            .filter(WorkflowTable.id == identifier)
+            .all()
+        )
+        return None if row[0].source_state is None else json.loads(row[0].source_state)
+
+    def get_sink_state(self, identifier: str) -> Optional[Dict[str, Any]]:
+        row = self._session.query(WorkflowTable.sink_state).filter(id=identifier).all()
+        return None if row[0].sink_state is None else json.loads(row[0].sink_state)
+
+    def get_analyzer_state(self, identifier: str) -> Optional[Dict[str, Any]]:
+        row = self._session.query(WorkflowTable.analyzer_state).filter(id=identifier).all()
+        return (
+            None if row[0].analyzer_state is None else json.loads(row[0].analyzer_state)
+        )
+
+    def add_workflow(self, workflow: Workflow) -> None:
+        self._session.add(
+            WorkflowTable(
+                id=workflow.id,
+                config=obj_to_json(workflow.config),
+                source_state=obj_to_json(workflow.states.source_state),
+                sink_state=obj_to_json(workflow.states.sink_state),
+                analyzer_state=obj_to_json(workflow.states.analyzer_state),
+            )
+        )
+        self._commit_transaction()
+
+    def update_workflow(self, workflow: Workflow) -> None:
+        self._session.query(WorkflowTable).filter_by(id=workflow.id).update(
+            {
+                WorkflowTable.config: obj_to_json(workflow.config),
+                WorkflowTable.source_state: obj_to_json(workflow.states.source_state),
+                WorkflowTable.sink_state: obj_to_json(workflow.states.sink_state),
+                WorkflowTable.analyzer_state: obj_to_json(
+                    workflow.states.analyzer_state
+                ),
+            },
+            synchronize_session=False,
+        )
+        self._commit_transaction()
+
+    def update_workflow_state(self, workflow_id: str, workflow_state: WorkflowState) -> None:
+        self._session.query(WorkflowTable).filter_by(id=workflow_id).update(
+            {
+                WorkflowTable.source_state: obj_to_json(workflow_state.source_state),
+                WorkflowTable.sink_state: obj_to_json(workflow_state.sink_state),
+                WorkflowTable.analyzer_state: obj_to_json(
+                    workflow_state.analyzer_state
+                ),
+            },
+            synchronize_session=False,
+        )
+        self._commit_transaction()
+
+    def update_source_state(self, workflow_id: str, state: Dict[str, Any]) -> None:
+        self._session.query(WorkflowTable).filter_by(id=workflow_id).update(
+            {WorkflowTable.source_state: obj_to_json(state)}, synchronize_session=False
+        )
+        self._commit_transaction()
+
+    def update_sink_state(self, workflow_id: str, state: Dict[str, Any]) -> None:
+        self._session.query(WorkflowTable).filter_by(id=workflow_id).update(
+            {WorkflowTable.sink_state: obj_to_json(state)}, synchronize_session=False
+        )
+        self._commit_transaction()
+
+    def update_analyzer_state(self, workflow_id: str, state: Dict[str, Any]) -> None:
+        self._session.query(WorkflowTable).filter_by(id=workflow_id).update(
+            {WorkflowTable.analyzer_state: obj_to_json(state)},
+            synchronize_session=False,
+        )
+        self._commit_transaction()
+
+    def delete_workflow(self, id: str) -> None:
+        self._session.query(WorkflowTable).filter_by(id=id).delete()
+        self._commit_transaction()
+
+    def _commit_transaction(self) -> Any:
+        try:
+            self._session.commit()
+        except Exception as ex:
+            logger.error(f"Transaction rollback: {ex.__cause__}")
+            # Rollback is important here otherwise self.session will be in inconsistent state and next call will fail
+            self._session.rollback()
+            raise ex
+
+    @staticmethod
+    def _convert_sql_row_to_workflow_state(row: Any) -> Optional[WorkflowState]:
+
+        if row is None:
+            return None
+
+        source_state_dict = (
+            None if row.source_state is None else json.loads(row.source_state)
+        )
+        sink_state_dict = None if row.sink_state is None else json.loads(row.sink_state)
+        analyzer_state_dict = (
+            None if row.analyzer_state is None else json.loads(row.analyzer_state)
+        )
+
+        workflow_states: Optional[WorkflowState] = None
+        if source_state_dict or sink_state_dict or analyzer_state_dict:
+            workflow_states = WorkflowState(
+                source_state=source_state_dict,
+                sink_state=sink_state_dict,
+                analyzer_state=analyzer_state_dict,
+            )
+
+        return workflow_states
+
+    @staticmethod
+    def _convert_sql_row_to_workflow_data(row: Any) -> Workflow:
+
+        config_dict = json.loads(row.config)
+        workflow = Workflow(
+            id=row.id,
+            config=WorkflowConfig(**config_dict),
+            states=WorkflowStore._convert_sql_row_to_workflow_state(row),
+        )
+        return workflow
diff --git a/obsei_module/obsei/workflow/workflow.py b/obsei_module/obsei/workflow/workflow.py
new file mode 100644
index 0000000000000000000000000000000000000000..efd79d36988b1820781ecdd2c7527ed082178f23
--- /dev/null
+++ b/obsei_module/obsei/workflow/workflow.py
@@ -0,0 +1,38 @@
+from typing import Any, Dict, Optional
+from uuid import uuid4
+
+from pydantic import BaseModel, Field
+
+from obsei.analyzer.base_analyzer import BaseAnalyzerConfig
+from obsei.sink.base_sink import BaseSinkConfig
+from obsei.source.base_source import BaseSourceConfig
+
+
+class WorkflowConfig(BaseModel):
+    source_config: Optional[BaseSourceConfig] = None
+    sink_config: Optional[BaseSinkConfig] = None
+    analyzer_config: Optional[BaseAnalyzerConfig] = None
+    time_in_seconds: Optional[int] = None
+
+    class Config:
+        arbitrary_types_allowed = True
+
+
+class WorkflowState(BaseModel):
+    source_state: Optional[Dict[str, Any]] = None
+    sink_state: Optional[Dict[str, Any]] = None
+    analyzer_state: Optional[Dict[str, Any]] = None
+
+    class Config:
+        arbitrary_types_allowed = True
+        response_model_exclude_unset = True
+
+
+class Workflow(BaseModel):
+    id: str = str(uuid4())
+    config: WorkflowConfig
+    states: WorkflowState = Field(WorkflowState())
+
+    class Config:
+        arbitrary_types_allowed = True
+        response_model_exclude_unset = True
diff --git a/obsei_module/pyproject.toml b/obsei_module/pyproject.toml
new file mode 100644
index 0000000000000000000000000000000000000000..5bc7e67dbbf32032c4ec7306fdba773e17620872
--- /dev/null
+++ b/obsei_module/pyproject.toml
@@ -0,0 +1,131 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project]
+dynamic = ["version"]
+requires-python = ">=3.8"
+name = "obsei"
+authors = [{ name = "Lalit Pagaria", email = "lalit.pagaria@oraika.com" }]
+maintainers = [
+    { name = "Lalit Pagaria", email = "lalit.pagaria@oraika.com" },
+    { name = "Girish Patel", email = "girish.patel@oraika.com" }
+]
+description = "Obsei is an automation tool for text analysis need"
+readme = "README.md"
+license = { text = "Apache Version 2.0" }
+
+keywords = [
+    "workflow",
+    "customer-support",
+    "customer-feedback",
+    "low-code",
+    "automation",
+    "cognitive-automation",
+    "social-listening",
+    "customer-feedback-analysis",
+    "customer-experience",
+    "market-research",
+    "nlp",
+    "oraika",
+    "obsei"
+]
+
+classifiers = [
+    "Development Status :: 2 - Pre-Alpha",
+    "Intended Audience :: Developers",
+    "Intended Audience :: Customer Service",
+    "Intended Audience :: Science/Research",
+    "Intended Audience :: Information Technology",
+    "License :: OSI Approved :: Apache Software License",
+    "Operating System :: OS Independent",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.8",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Topic :: Scientific/Engineering :: Artificial Intelligence",
+    "Topic :: Software Development :: Libraries :: Python Modules",
+]
+
+dependencies = [
+    "pytz >= 2023.3.post1",
+    "pydantic >= 2.5.3",
+    "pydantic-settings >= 2.1.0",
+    "python-dateutil >= 2.8.2",
+    "SQLAlchemy >= 2.0.24",
+    "mmh3 >= 4.0.1",
+    "beautifulsoup4 >= 4.9.3",
+    "dateparser >= 1.2.0",
+    "requests >= 2.26.0",
+]
+
+
+[project.optional-dependencies]
+
+twitter-api = ["searchtweets-v2 >= 1.1.1"]
+google-play-scraper = ["google-play-scraper >= 1.2.4"]
+google-play-api = ["google-api-python-client >= 2.111.0"]
+app-store-scraper = ["app-store-reviews-reader >= 1.2"]
+reddit-scraper = ["reddit-rss-reader >= 1.3.2"]
+reddit-api = ["praw >= 7.7.1"]
+pandas = ["pandas >= 2.0.3"]
+google-news-scraper = ["GoogleNews >= 1.6.12"]
+facebook-api = ["python-facebook-api >= 0.15.0"]
+atlassian-api = ["atlassian-python-api >= 3.41.4"]
+elasticsearch = ["elasticsearch >= 8.11.1"]
+slack-api = ["slack-sdk >= 3.26.1"]
+
+source = [
+    "obsei[twitter-api,google-play-scraper,google-play-api,app-store-scraper]",
+    "obsei[reddit-scraper,reddit-api,pandas,google-news-scraper,facebook-api]",
+]
+
+sink = ["obsei[atlassian-api,elasticsearch,slack-api,pandas]"]
+
+analyzer = [
+    "torch >= 2.1.2",
+    "vaderSentiment >= 3.3.2",
+    "transformers >= 4.36.2",
+    "nltk >= 3.8.1",
+    "sentencepiece >= 0.1.99",
+    "presidio-analyzer >= 2.2.351",
+    "presidio-anonymizer >= 2.2.351",
+    "spacy >= 3.7.2",
+]
+
+dev = [
+    "pre-commit >= 2.20.0",
+    "black >= 22.10.0",
+    "mypy >= 0.991",
+    "types-requests",
+    "types-python-dateutil",
+    "types-PyYAML",
+    "types-dateparser",
+    "types-protobuf",
+    "types-pytz",
+    "pytest >= 7.2.0",
+    "pip-tools >= 6.10.0",
+    "coverage >= 6.5.0",
+]
+
+all = ["obsei[analyzer,source,sink]"]
+
+## GPL dependencies (these are optional)
+gpl = ["trafilatura >= 1.6.3"]
+
+[project.urls]
+repository = "https://github.com/obsei/obsei"
+homepage = "https://obsei.com"
+documentation = "https://obsei.com"
+changelog = "https://github.com/obsei/obsei/releases"
+
+[tool.hatch.build.targets.sdist]
+include = ["/obsei"]
+
+[tool.hatch.build.targets.wheel]
+packages = ["obsei"]
+
+[tool.hatch.version]
+path = "obsei/_version.py"
+
diff --git a/obsei_module/sample-ui/Dockerfile b/obsei_module/sample-ui/Dockerfile
new file mode 100644
index 0000000000000000000000000000000000000000..28f57d2ed7d39a985956290ad690cc3b0f1bf01b
--- /dev/null
+++ b/obsei_module/sample-ui/Dockerfile
@@ -0,0 +1,19 @@
+FROM python:3.10-slim-bullseye
+
+WORKDIR /home/user
+
+RUN apt-get update && apt-get install -y --no-install-recommends curl pkg-config cmake git g++ \
+    && apt-get clean autoclean && apt-get autoremove -y \
+    && rm -rf /var/lib/{apt,dpkg,cache,log}/
+
+COPY ui.py /home/user/
+COPY utils.py /home/user/
+COPY config.yaml /home/user/
+COPY requirements.txt /home/user/
+
+RUN pip install --upgrade pip
+RUN pip install --no-cache-dir -r requirements.txt
+
+EXPOSE 8501
+
+CMD ["streamlit", "run", "ui.py"]
diff --git a/obsei_module/sample-ui/README.md b/obsei_module/sample-ui/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..a16df6430ff4ed31b98c8e0a23f1c3f617ab395d
--- /dev/null
+++ b/obsei_module/sample-ui/README.md
@@ -0,0 +1,22 @@
+## Demo UI
+
+This is a minimal UI that can spin up to test Obsei. It's based on streamlit and is very easy to extend for your own use.
+
+![Screenshot](https://raw.githubusercontent.com/obsei/obsei-resources/master/images/obsei-ui-demo.png)
+
+## Usage
+
+### Option 1: Local
+Execute in this folder:
+```shell
+pip install -r requirements.txt
+streamlit run ui.py
+```
+
+### Option 2: Container
+
+Just run
+```
+docker run -d --name obesi-ui -p 8501:8501 obsei/obsei-ui-demo
+```
+You can find the UI at `http://localhost:8501`
diff --git a/obsei_module/sample-ui/__pycache__/utils.cpython-311.pyc b/obsei_module/sample-ui/__pycache__/utils.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..af93f703072030fca175efb8cf3b524bc27ce832
Binary files /dev/null and b/obsei_module/sample-ui/__pycache__/utils.cpython-311.pyc differ
diff --git a/obsei_module/sample-ui/config.yaml b/obsei_module/sample-ui/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..18cc7433b879c600a0313d2cceff6ff05e4cc4e2
--- /dev/null
+++ b/obsei_module/sample-ui/config.yaml
@@ -0,0 +1,372 @@
+source:
+  Youtube Scrapper:
+    _icon_: "https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/Youtube.png"
+    _help_:
+      - '`video url` is Youtube video url.'
+    source:
+      _target_: obsei.source.youtube_scrapper.YoutubeScrapperSource
+    config:
+      _target_: obsei.source.youtube_scrapper.YoutubeScrapperConfig
+      video_url: "https://www.youtube.com/watch?v=uZfns0JIlFk"
+      lookup_period: "1Y"
+      max_comments: 10
+  Appstore Scrapper:
+    _icon_: "https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/appstore.png"
+    _help_:
+      - '`app_url` is application url on app store.
+      - For example for Xcode - https://apps.apple.com/us/app/xcode/id497799835'
+    source:
+      _target_: obsei.source.appstore_scrapper.AppStoreScrapperSource
+    config:
+      _target_: obsei.source.appstore_scrapper.AppStoreScrapperConfig
+      app_url: "https://apps.apple.com/us/app/gmail-email-by-google/id422689480"
+      lookup_period: "1h"
+      max_count: 5
+  Playstore Scrapper:
+    _icon_: "https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/playstore.png"
+    _help_:
+      - '`app_url` is application url on play store'
+      - 'For example for Gmail - https://play.google.com/store/apps/details?id=com.google.android.gm&hl=en_IN&gl=US'
+    source:
+      _target_: obsei.source.playstore_scrapper.PlayStoreScrapperSource
+    config:
+      _target_: obsei.source.playstore_scrapper.PlayStoreScrapperConfig
+      app_url: "https://play.google.com/store/apps/details?id=com.google.android.gm&hl=en_IN&gl=US"
+      lookup_period: "1h"
+      max_count: 5
+  Maps Reviews Scrapper:
+    _icon_: "https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/google_maps.png"
+    _help_:
+      - 'Collect `api_key` from https://outscraper.com/'
+      - ''
+      - 'For `queries` enter google maps urls or place ids, for example'
+      - "https://www.google.co.in/maps/place/Taj+Mahal/@27.1751496,78.0399535,17z/data=!4m5!3m4!1s0x39747121d702ff6d:0xdd2ae4803f767dde!8m2!3d27.1751448!4d78.0421422"
+    source:
+      _target_: obsei.source.google_maps_reviews.OSGoogleMapsReviewsSource
+    config:
+      _target_: obsei.source.google_maps_reviews.OSGoogleMapsReviewsConfig
+      api_key: ''
+      queries:
+        - "https://www.google.co.in/maps/place/Taj+Mahal/@27.1751496,78.0399535,17z/data=!4m5!3m4!1s0x39747121d702ff6d:0xdd2ae4803f767dde!8m2!3d27.1751448!4d78.0421422"
+      number_of_reviews: 5
+  Reddit Scrapper:
+    _icon_: "https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/reddit.png"
+    _help_:
+      - 'Reddit subreddit, search etc rss url. For proper url refer following link -'
+      - 'https://www.reddit.com/r/pathogendavid/comments/tv8m9/pathogendavids_guide_to_rss_and_reddit/'
+    source:
+      _target_: obsei.source.reddit_source.RedditScrapperSource
+    config:
+      _target_: obsei.source.reddit_source.RedditScrapperConfig
+      url: 'https://www.reddit.com/r/wallstreetbets/comments/.rss?sort=new'
+      lookup_period: "1h"
+  Twitter:
+    _icon_: "https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/twitter.png"
+    _help_:
+      - '`query` accept search string, @user or #hashtags also'
+      - ''
+      - 'Need twitter `consumer key` and `secret`, get it from https://developer.twitter.com/en/apply-for-access'
+    source:
+      _target_: obsei.source.twitter_source.TwitterSource
+    config:
+      _target_: obsei.source.twitter_source.TwitterSourceConfig
+      query: "@Twitter"
+      lookup_period: "1h"
+      max_tweets: 10
+      cred_info:
+        _target_: obsei.source.twitter_source.TwitterCredentials
+        consumer_key: ''
+        consumer_secret: ''
+  Facebook:
+    _icon_: "https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/facebook.png"
+    _help_:
+      - '`page_id` is id of your facebook page'
+      - ''
+      - 'Need facebook app_id, app_secret and long_term_token. Get it from https://developers.facebook.com/apps/'
+    source:
+      _target_: obsei.source.facebook_source.FacebookSource
+    config:
+      _target_: obsei.source.facebook_source.FacebookSourceConfig
+      page_id: "110844591144719"
+      lookup_period: "1h"
+      cred_info:
+        _target_: obsei.source.facebook_source.FacebookCredentials
+        app_id: ''
+        app_secret: ''
+        long_term_token: ''
+  Email:
+    _icon_: "https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/gmail.png"
+    _help_:
+      - 'List of IMAP servers for most commonly used email providers https://www.systoolsgroup.com/imap/'
+      - ''
+      - 'Also, if you are using a `Gmail` account then make sure you allow less secure apps on your account'
+      - 'https://myaccount.google.com/lesssecureapps?pli=1'
+      - 'Also enable IMAP access - https://mail.google.com/mail/u/0/#settings/fwdandpop'
+    source:
+      _target_: obsei.source.email_source.EmailSource
+    config:
+      _target_: obsei.source.email_source.EmailConfig
+      imap_server: 'imap.gmail.com'
+      cred_info:
+        _target_: obsei.source.email_source.EmailCredInfo
+        username: ''
+        password: ''
+      lookup_period: "1h"
+  Reddit:
+    _icon_: "https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/reddit.png"
+    _help_:
+      - 'Reddit account `username` and `password` require'
+      - 'Enter list of `subreddits`'
+    source:
+      _target_: obsei.source.reddit_source.RedditSource
+    config:
+      _target_: obsei.source.reddit_source.RedditConfig
+      subreddits:
+        - 'wallstreetbets'
+      cred_info:
+        _target_: obsei.source.reddit_source.RedditCredInfo
+        username: ''
+        password: ''
+      lookup_period: "1h"
+  Google News:
+    _icon_: "https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/googlenews.png"
+    _help_:
+      - '`fetch_article` use crawler to fetch full article'
+    source:
+      _target_: obsei.source.google_news_source.GoogleNewsSource
+    config:
+      _target_: obsei.source.google_news_source.GoogleNewsConfig
+      query: "bitcoin"
+      max_results: 3
+      lookup_period: "1d"
+      fetch_article: true
+  Website Crawler:
+    _icon_: "https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/webcrawler.png"
+    _help_:
+      - '`package name` can be found at the end of the url of app in play store.'
+      - ''
+      - 'For example - https://play.google.com/store/apps/details?id=com.google.android.gm&hl=en&gl=US'
+      - '`com.google.android.gm` is the `package name` for xcode and `us` is `country`.'
+    source:
+      _target_: obsei.source.website_crawler_source.TrafilaturaCrawlerSource
+    config:
+      _target_: obsei.source.website_crawler_source.TrafilaturaCrawlerConfig
+      urls:
+        - 'https://obsei.github.io/obsei/'
+sink:
+  Panda Dataframe:
+    _icon_: "https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/pandas.svg"
+    _help_: null
+    sink:
+      _target_: obsei.sink.pandas_sink.PandasSink
+    config:
+      _target_: obsei.sink.pandas_sink.PandasSinkConfig
+  Logger:
+    _icon_: "https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/logger.png"
+    _help_: null
+    sink:
+      _target_: obsei.sink.logger_sink.LoggerSink
+    config:
+      _target_: obsei.sink.logger_sink.LoggerSinkConfig
+  Jira:
+    _icon_: "https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/jira.png"
+    _help_:
+      - 'For testing purpose you can start jira server locally'
+      - 'Refer https://developer.atlassian.com/server/framework/atlassian-sdk/atlas-run-standalone/'
+      - ''
+      - 'Provide `server url`, `username`  and `password` of the user'
+      - ''
+      - '`type` of issue to be created, for more information refer -'
+      - 'https://support.atlassian.com/jira-cloud-administration/docs/what-are-issue-types/'
+      - ''
+      - '`project` in which issue to be created, for more information refer -'
+      - 'https://support.atlassian.com/jira-software-cloud/docs/what-is-a-jira-software-project/'
+    sink:
+      _target_: obsei.sink.jira_sink.JiraSink
+    config:
+      _target_: obsei.sink.jira_sink.JiraSinkConfig
+      url: 'http://localhost:2990/jira'
+      username: ''
+      password: ''
+      issue_type:
+        name: "Task"
+      project:
+        key: ""
+  Zendesk:
+    _icon_: "https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/zendesk.png"
+    _help_:
+      - 'For custom domain refer http://docs.facetoe.com.au/zenpy.html#custom-domains'
+      - 'Provide zendesk `domain`'
+      - ''
+      - 'Provide `subdomain` if you have one'
+      - ''
+      - 'Provide zendesk account `email` and `password`'
+    sink:
+      _target_: obsei.sink.zendesk_sink.ZendeskSink
+    config:
+      _target_: obsei.sink.zendesk_sink.ZendeskSinkConfig
+      domain: "zendesk.com"
+      subdomain: null
+      cred_info:
+        _target_: obsei.sink.zendesk_sink.ZendeskCredInfo
+        email: ''
+        password: ''
+  Slack:
+    _icon_: "https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/slack.svg"
+    _help_:
+      - 'Provide slack bot/app `token`, for more detail refer -'
+      - 'https://slack.com/intl/en-de/help/articles/215770388-Create-and-regenerate-API-tokens'
+      - ''
+      - 'To get `channel id` refer -'
+      - 'https://stackoverflow.com/questions/40940327/what-is-the-simplest-way-to-find-a-slack-team-id-and-a-channel-id'
+    sink:
+      _target_: obsei.sink.slack_sink.SlackSink
+    config:
+      _target_: obsei.sink.slack_sink.SlackSinkConfig
+      slack_token: ''
+      channel_id: ''
+      jinja_template: |
+        :bell: Hi there!, a new notification by *Obsei*
+        >Content:
+        ```
+          {%- for key, value in payload.items() recursive%}
+            {%- if value is mapping -%}
+              {{loop(value.items())}}
+            {%- else %}
+              {{key}}: {{value}}
+            {%- endif %}
+          {%- endfor%}
+        ```
+  Elastic:
+    _icon_: "https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/elastic.png"
+    _help_:
+      - 'For testing purpose you can start Elasticsearch server locally via docker'
+      - '`docker run -d --name elasticsearch -p 9200:9200 -e "discovery.type=single-node" elasticsearch:7.9.2`'
+      - ''
+      - ' Provide server `hostname`, `port` along with `index` to be used'
+    sink:
+      _target_: obsei.sink.elasticsearch_sink.ElasticSearchSink
+    config:
+      _target_: obsei.sink.elasticsearch_sink.ElasticSearchSinkConfig
+      host: "localhost"
+      port: 9200
+      index_name: "test"
+  Http:
+    _icon_: "https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/http_api.png"
+    _help_:
+      - 'For testing purpose you can create mock http server via postman, refer -'
+      - 'https://learning.postman.com/docs/designing-and-developing-your-api/mocking-data/setting-up-mock/'
+      - ''
+      - 'Provide http server `url` and `headers`'
+    sink:
+      _target_: obsei.sink.http_sink.HttpSink
+    config:
+      _target_: obsei.sink.http_sink.HttpSinkConfig
+      url: 'https://localhost:8080/api/path'
+      headers:
+        Content-type: "application/json"
+analyzer:
+  Sentiment:
+    _icon_: "https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/sentiment.png"
+    Transformer:
+      _help_:
+        - 'For supported models refer https://huggingface.co/models?filter=zero-shot-classification'
+        - ''
+        - 'Possible device values are `auto` (cuda:0 if available otherwise cpu), `cpu` and `cuda:{id}` (cuda device id)'
+      config:
+        _target_: obsei.analyzer.sentiment_analyzer.TransformersSentimentAnalyzerConfig
+        labels:
+          - "positive"
+          - "negative"
+        multi_class_classification: false
+      analyzer:
+        _target_: obsei.analyzer.sentiment_analyzer.TransformersSentimentAnalyzer
+        model_name_or_path: "typeform/mobilebert-uncased-mnli"
+        device: "auto"
+    Vader:
+      _help_:
+        - 'Vader is less resource hungry dictionary based Vader Sentiment detector'
+      analyzer:
+        _target_: obsei.analyzer.sentiment_analyzer.VaderSentimentAnalyzer
+  Classification:
+    _icon_: "https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/classification.png"
+    Transformer:
+      _help_:
+        - 'For supported models refer https://huggingface.co/models?filter=zero-shot-classification'
+        - ''
+        - 'Provide classification `labels`, two labels "positive" and "negative" are added by default'
+        - ''
+        - 'Possible device values are `auto` (cuda:0 if available otherwise cpu), `cpu` and `cuda:{id}` (cuda device id)'
+      config:
+        _target_: obsei.analyzer.classification_analyzer.ClassificationAnalyzerConfig
+        labels:
+          - "service"
+          - "content"
+          - "interface"
+        multi_class_classification: true
+      analyzer:
+        _target_: obsei.analyzer.classification_analyzer.ZeroShotClassificationAnalyzer
+        model_name_or_path: "typeform/mobilebert-uncased-mnli"
+        device: "auto"
+  Named Entity Recognition:
+    _icon_: "https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/ner.png"
+    Transformer:
+      _help_:
+        - 'For supported models refer https://huggingface.co/models?filter=token-classification'
+        - ''
+        - 'Possible device values are `auto` (cuda:0 if available otherwise cpu), `cpu` and `cuda:{id}` (cuda device id)'
+      analyzer:
+        _target_: obsei.analyzer.ner_analyzer.TransformersNERAnalyzer
+        model_name_or_path: "elastic/distilbert-base-cased-finetuned-conll03-english"
+        device: "auto"
+    Spacy:
+      _help_:
+        - 'For supported models refer https://spacy.io/models'
+      analyzer:
+        _target_: obsei.analyzer.ner_analyzer.SpacyNERAnalyzer
+        model_name_or_path: "en_core_web_sm"
+  Translation:
+    _icon_: "https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/translator.png"
+    Transformer:
+      _help_:
+        - 'For supported models refer https://huggingface.co/models?pipeline_tag=translation'
+        - ''
+        - 'Possible device values are `auto` (cuda:0 if available otherwise cpu), `cpu` and `cuda:{id}` (cuda device id)'
+      analyzer:
+        _target_: obsei.analyzer.translation_analyzer.TranslationAnalyzer
+        model_name_or_path: "Helsinki-NLP/opus-mt-en-hi"
+        device: "auto"
+  PII Anonymizer:
+    _icon_: "https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/pii.png"
+    Presidio:
+      _help_:
+        - '`analyze_only` decide whether to return only pii analysis or anonymize text'
+        - ''
+        - '`return_decision_process` decide whether to return detail information about anonymization decision'
+        - ''
+        - 'For `nlp_engine_name` spacy and stanza nlp engines are supported, For more info refer -'
+        - 'https://microsoft.github.io/presidio/analyzer/developing_recognizers/#utilize-spacy-or-stanza'
+        - ''
+        - 'Provide `model_name` and `lang_code` of the model'
+      config:
+        _target_: obsei.analyzer.pii_analyzer.PresidioPIIAnalyzerConfig
+        analyze_only: false
+        return_decision_process: false
+      analyzer:
+        _target_: obsei.analyzer.pii_analyzer.PresidioPIIAnalyzer
+        engine_config:
+          _target_: obsei.analyzer.pii_analyzer.PresidioEngineConfig
+          nlp_engine_name: "spacy"
+          models:
+            - _target_: obsei.analyzer.pii_analyzer.PresidioModelConfig
+              model_name: "en_core_web_md"
+              lang_code: "en"
+  Dummy:
+    _icon_: "https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/dummy.png"
+    Dummy:
+      _help_:
+        - 'Dummy Analyzer, do nothing it simply used for transforming input to output'
+      config:
+        _target_: obsei.analyzer.dummy_analyzer.DummyAnalyzerConfig
+      analyzer:
+        _target_: obsei.analyzer.dummy_analyzer.DummyAnalyzer
diff --git a/obsei_module/sample-ui/requirements.txt b/obsei_module/sample-ui/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f5bd699bdd5d799a2f7ae1f47386fd1e49181ad5
--- /dev/null
+++ b/obsei_module/sample-ui/requirements.txt
@@ -0,0 +1,4 @@
+git+https://github.com/obsei/obsei@master#egg=obsei[all]
+streamlit
+trafilatura
+tornado>=6.3.2 # not directly required, pinned by Snyk to avoid a vulnerability
diff --git a/obsei_module/sample-ui/ui.py b/obsei_module/sample-ui/ui.py
new file mode 100644
index 0000000000000000000000000000000000000000..f5423aa0a0a15bf54cd36e96b42255015983fc92
--- /dev/null
+++ b/obsei_module/sample-ui/ui.py
@@ -0,0 +1,97 @@
+from utils import *
+
+current_path = pathlib.Path(__file__).parent.absolute().as_posix()
+configuration = get_obsei_config(current_path, "config.yaml")
+logo_url = "https://raw.githubusercontent.com/obsei/obsei-resources/master/logos/obsei_200x200.png"
+
+st.set_page_config(page_title="Obsei Demo", layout="wide", page_icon=logo_url)
+
+st.title("Obsei Demo").markdown(
+    get_icon_name("Obsei Demo", logo_url, 60, 35), unsafe_allow_html=True
+)
+
+st.success(
+    """
+Please ⭐ the repo and share the feedback at https://github.com/obsei/obsei?utm_source=streamlit
+    """
+)
+st.warning(
+    """
+**Note:** Demo run will require some secure information based on source or sink selected,
+if you don't trust this environment please close the app.
+"""
+)
+
+(
+    pipeline_col,
+    spinner_col,
+    execute_col,
+    download_python_col,
+    download_yaml_col,
+) = st.columns([2, 2, 1, 1, 1])
+
+col_map = dict()
+col_map["source"], col_map["analyzer"], col_map["sink"] = st.columns([1, 1, 1])
+
+selected = {}
+name_map = {"source": "Observer", "analyzer": "Analyzer", "sink": "Informer"}
+
+for node_name, col in col_map.items():
+    item_list = [k for k in configuration[node_name].keys()]
+    selected[node_name] = col.selectbox(f"Select {name_map[node_name]}", item_list)
+    
+
+icons = [get_icon_name(None, configuration[k][v]["_icon_"]) for k, v in selected.items()]
+pipeline_col.header("Pipeline").markdown(
+    f"**Pipeline:** {icons[0]} ➡➡ {icons[1]} ➡➡ {icons[2]}",
+    unsafe_allow_html=True,
+)
+
+generate_config = {}
+log_component = {}
+for node_name, node_value in selected.items():
+    type_config = configuration[node_name][node_value]
+    if node_name == "analyzer":
+        type_list = []
+        for config_key in type_config.keys():
+            if config_key != "_icon_":
+                type_list.append(config_key)
+        selected_type = col_map[node_name].selectbox(f"{name_map[node_name]} Type", type_list)
+        type_config = type_config[selected_type]
+
+    config = None
+    if "config" in type_config:
+        config = type_config["config"]
+        if type_config["_help_"] is not None:
+            with col_map[node_name].expander("Config Help Info", False):
+                help_area = "\n".join(type_config["_help_"])
+                st.code(f"{help_area}")
+
+    config_expander = None
+    if config is not None:
+        config_expander = col_map[node_name].expander(f"Configure {name_map[node_name]}", False)
+        render_config(config, config_expander)
+
+    if node_name == "analyzer" and node_name in type_config and len(type_config[node_name]) > 1:
+        config_expander = config_expander or col_map[node_name].expander(f"Configure {name_map[node_name]}", False)
+        render_config(type_config["analyzer"], config_expander)
+
+    generate_config[node_name] = type_config[node_name]
+    generate_config[f"{node_name}_config"] = config
+
+    log_expander = col_map[node_name].expander(f"{name_map[node_name]} Logs", True)
+    log_component[node_name] = log_expander.empty()
+    log_component[node_name].write("Run \"🚀 Execute\" first")
+
+python_code = generate_python(generate_config)
+yaml_code = generate_yaml(generate_config)
+
+execute_button = execute_col.button("🚀 Execute")
+if execute_button:
+    execute_workflow(generate_config, spinner_col, log_component)
+
+with download_python_col:
+    download_button(python_code, "generated-code.py", "🐍 Download (.py)")
+
+with download_yaml_col:
+    download_button(yaml_code, "generated-config.yaml", "📖 Download (.yaml)")
diff --git a/obsei_module/sample-ui/utils.py b/obsei_module/sample-ui/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..ce6a9f5c244935b45fff1dc8b19ebe87a1803e89
--- /dev/null
+++ b/obsei_module/sample-ui/utils.py
@@ -0,0 +1,222 @@
+import base64
+import logging
+import pathlib
+import re
+import sys
+import uuid
+
+import streamlit as st
+import yaml
+
+from obsei.configuration import ObseiConfiguration
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(stream=sys.stdout, level=logging.INFO)
+
+
+def img_to_bytes(img_path):
+    img_bytes = pathlib.Path(img_path).read_bytes()
+    encoded = base64.b64encode(img_bytes).decode()
+    return encoded
+
+
+# Copied from https://github.com/jrieke/traingenerator/blob/main/app/utils.py
+def download_button(
+    object_to_download, download_filename, button_text  # , pickle_it=False
+):
+    try:
+        # some strings <-> bytes conversions necessary here
+        b64 = base64.b64encode(object_to_download.encode()).decode()
+    except AttributeError as e:
+        b64 = base64.b64encode(object_to_download).decode()
+
+    button_uuid = str(uuid.uuid4()).replace("-", "")
+    button_id = re.sub("\d+", "", button_uuid)
+
+    custom_css = f"""
+        <style>
+            #{button_id} {{
+                display: inline-flex;
+                align-items: center;
+                justify-content: center;
+                background-color: rgb(255, 255, 255);
+                color: rgb(38, 39, 48);
+                padding: .25rem .75rem;
+                position: relative;
+                text-decoration: none;
+                border-radius: 4px;
+                border-width: 1px;
+                border-style: solid;
+                border-color: rgb(230, 234, 241);
+                border-image: initial;
+            }}
+            #{button_id}:hover {{
+                border-color: rgb(246, 51, 102);
+                color: rgb(246, 51, 102);
+            }}
+            #{button_id}:active {{
+                box-shadow: none;
+                background-color: rgb(246, 51, 102);
+                color: white;
+                }}
+        </style> """
+
+    dl_link = (
+        custom_css
+        + f'<a download="{download_filename}" id="{button_id}" href="data:file/txt;base64,{b64}">{button_text}</a><br><br>'
+    )
+    # dl_link = f'<a download="{download_filename}" id="{button_id}" href="data:file/txt;base64,{b64}"><input type="button" kind="primary" value="{button_text}"></a><br></br>'
+
+    st.markdown(dl_link, unsafe_allow_html=True)
+
+
+def get_obsei_config(current_path, file_name):
+    return ObseiConfiguration(
+        config_path=current_path,
+        config_filename=file_name,
+    ).configuration
+
+
+@st.cache
+def get_icon_name(name, icon, icon_size=40, font_size=1):
+    if not name:
+        return f'<img style="vertical-align:middle;margin:5px 5px" src="{icon}" width="{icon_size}" height="{icon_size}">'
+    return (
+        f'<p style="font-size:{font_size}px">'
+        f'<img style="vertical-align:middle;margin:1px 5px" src="{icon}" width="{icon_size}" height="{icon_size}">'
+        f"{name}</p>"
+    )
+
+
+def render_config(config, component, help_str=None, parent_key=None):
+    if config is None:
+        return
+
+    prefix = "" if parent_key is None else f"{parent_key}."
+    if help_str is not None:
+        with component.expander("Info", False):
+            help_area = "\n".join(help_str)
+            st.code(f"{help_area}")
+    for k, v in config.items():
+        if k == "_target_":
+            continue
+
+        if isinstance(v, dict):
+            render_config(v, component, None, k)
+        elif isinstance(v, list):
+            if len(v) == 0:
+                continue
+            is_object = isinstance(v[0], dict)
+            if is_object:
+                for idx, sub_element in enumerate(v):
+                    render_config(sub_element, component, None, f"{k}[{idx}]")
+            else:
+                text_data = component.text_area(
+                    f"{prefix}{k}", ", ".join(v), help="Comma separated list"
+                )
+                text_list = text_data.split(",")
+                config[k] = [text.strip() for text in text_list]
+        elif isinstance(v, bool):
+            options = [True, False]
+            selected_option = component.radio(f"{prefix}{k}", options, options.index(v))
+            config[k] = bool(selected_option)
+        else:
+            tokens = k.split("_")
+            is_secret = tokens[-1] in ["key", "password", "token", "secret"]
+            hint = (
+                "Enter value"
+                if "lookup" not in tokens
+                else "Format: `<number><d|h|m>` d=day, h=hour & m=minute"
+            )
+            config[k] = component.text_input(
+                f"{prefix}{k}",
+                v,
+                type="password" if is_secret else "default",
+                help=hint,
+            )
+
+
+def generate_python(generate_config):
+    return f"""
+from obsei.configuration import ObseiConfiguration
+
+# This is Obsei workflow path and filename
+config_path = "./"
+config_filename = "workflow.yml"
+
+# Extract config via yaml file using `config_path` and `config_filename`
+obsei_configuration = ObseiConfiguration(config_path=config_path, config_filename=config_filename)
+
+# Initialize objects using configuration
+source_config = obsei_configuration.initialize_instance("source_config")
+source = obsei_configuration.initialize_instance("source")
+analyzer = obsei_configuration.initialize_instance("analyzer")
+analyzer_config = obsei_configuration.initialize_instance("analyzer_config")
+sink_config = obsei_configuration.initialize_instance("sink_config")
+sink = obsei_configuration.initialize_instance("sink")
+
+# This will fetch information from configured source ie twitter, app store etc
+source_response_list = source.lookup(source_config)
+
+# This will execute analyzer (Sentiment, classification etc) on source data with provided analyzer_config
+# Analyzer will it's output to `segmented_data` inside `analyzer_response`
+analyzer_response_list = analyzer.analyze_input(
+    source_response_list=source_response_list,
+    analyzer_config=analyzer_config
+)
+
+# This will send analyzed output to configure sink ie Slack, Zendesk etc
+sink_response_list = sink.send_data(analyzer_response_list, sink_config)
+"""
+
+
+def generate_yaml(generate_config):
+    return yaml.dump(generate_config)
+
+
+def execute_workflow(generate_config, component=None, log_components=None):
+    print(generate_config)
+    progress_show = None
+    if component:
+        progress_show = component.empty()
+        progress_show.code("🏄🏄🏄 Processing 🐢🐢🐢")
+    try:
+        obsei_configuration = ObseiConfiguration(configuration=generate_config)
+
+        source_config = obsei_configuration.initialize_instance("source_config")
+        source = obsei_configuration.initialize_instance("source")
+
+        analyzer = obsei_configuration.initialize_instance("analyzer")
+        print(analyzer)
+        analyzer_config = obsei_configuration.initialize_instance("analyzer_config")
+        print(analyzer_config)
+
+        sink_config = obsei_configuration.initialize_instance("sink_config")
+        sink = obsei_configuration.initialize_instance("sink")
+
+        source_response_list = source.lookup(source_config)
+        print(source_response_list)
+        log_components["source"].write([vars(response) for response in source_response_list])
+
+        analyzer_response_list = analyzer.analyze_input(
+            source_response_list=source_response_list, analyzer_config=analyzer_config
+        )
+        print(analyzer_response_list)
+        log_components["analyzer"].write([vars(response) for response in analyzer_response_list])
+
+        sink_response_list = sink.send_data(analyzer_response_list, sink_config)
+     
+        if sink.TYPE == 'Pandas':
+            log_components["sink"].write(sink_response_list)
+        elif sink_response_list is not None:
+            log_components["sink"].write([vars(response) for response in sink_response_list])
+        else:
+            log_components["sink"].write("No Data")
+
+        if progress_show:
+            progress_show.code("🎉🎉🎉 Processing Complete!! 🍾🍾🍾")
+    except Exception as ex:
+        if progress_show:
+            progress_show.code(f"❗❗❗ Processing Failed!! 😞😞😞 \n 👉 ({str(ex)})")
+
+        raise ex
diff --git a/obsei_module/test/conftest.py b/obsei_module/test/conftest.py
new file mode 100644
index 0000000000000000000000000000000000000000..360d65e38781b46566fca08001bb422cff2370c3
--- /dev/null
+++ b/obsei_module/test/conftest.py
@@ -0,0 +1,77 @@
+import pytest
+
+from obsei.analyzer.classification_analyzer import (
+    ZeroShotClassificationAnalyzer,
+    TextClassificationAnalyzer,
+)
+from obsei.analyzer.ner_analyzer import TransformersNERAnalyzer, SpacyNERAnalyzer
+from obsei.analyzer.pii_analyzer import (
+    PresidioEngineConfig,
+    PresidioModelConfig,
+    PresidioPIIAnalyzer,
+)
+from obsei.analyzer.sentiment_analyzer import VaderSentimentAnalyzer
+from obsei.analyzer.translation_analyzer import TranslationAnalyzer
+from obsei.preprocessor.text_cleaner import TextCleaner
+from obsei.preprocessor.text_splitter import TextSplitter
+
+
+@pytest.fixture(scope="session")
+def zero_shot_analyzer():
+    return ZeroShotClassificationAnalyzer(
+        model_name_or_path="typeform/mobilebert-uncased-mnli",
+    )
+
+
+@pytest.fixture(scope="session")
+def text_classification_analyzer():
+    return TextClassificationAnalyzer(
+        model_name_or_path="obsei-ai/sell-buy-intent-classifier-bert-mini",
+    )
+
+
+@pytest.fixture(scope="session")
+def vader_analyzer():
+    return VaderSentimentAnalyzer()
+
+
+@pytest.fixture(scope="session")
+def trf_ner_analyzer():
+    return TransformersNERAnalyzer(
+        model_name_or_path="dbmdz/bert-large-cased-finetuned-conll03-english",
+        tokenizer_name="bert-base-cased",
+    )
+
+
+@pytest.fixture(scope="session")
+def spacy_ner_analyzer():
+    return SpacyNERAnalyzer(
+        model_name_or_path="en_core_web_sm",
+    )
+
+
+@pytest.fixture(scope="session")
+def translate_analyzer():
+    return TranslationAnalyzer(
+        model_name_or_path="Helsinki-NLP/opus-mt-hi-en", batch_size=1
+    )
+
+
+@pytest.fixture(scope="session")
+def pii_analyzer():
+    return PresidioPIIAnalyzer(
+        engine_config=PresidioEngineConfig(
+            nlp_engine_name="spacy",
+            models=[PresidioModelConfig(model_name="en_core_web_lg", lang_code="en")],
+        )
+    )
+
+
+@pytest.fixture(scope="session")
+def text_cleaner():
+    return TextCleaner()
+
+
+@pytest.fixture(scope="session")
+def text_splitter():
+    return TextSplitter()
diff --git a/obsei_module/test/test_analyzer.py b/obsei_module/test/test_analyzer.py
new file mode 100644
index 0000000000000000000000000000000000000000..b16088ba741fe812e60f737d6658faeb4f7282c0
--- /dev/null
+++ b/obsei_module/test/test_analyzer.py
@@ -0,0 +1,165 @@
+import pytest
+
+from obsei.analyzer.classification_analyzer import ClassificationAnalyzerConfig
+from obsei.payload import TextPayload
+from obsei.postprocessor.inference_aggregator import InferenceAggregatorConfig
+from obsei.postprocessor.inference_aggregator_function import (
+    ClassificationAverageScore,
+    ClassificationMaxCategories,
+)
+from obsei.preprocessor.text_splitter import TextSplitterConfig
+
+GOOD_TEXT = """If anyone is interested... these are our hosts. I can’t recommend them enough, Abc & Pbc.
+
+The unit is just lovely, you go to sleep & wake up to this incredible place, and you have use of a Weber grill and a ridiculously indulgent hot-tub under the stars"""
+
+BAD_TEXT = """I had the worst experience ever with XYZ in Egypt. Bad Cars, asking to pay in cash,  do not have enough fuel,  do not open AC,  wait far away from my location until the trip is cancelled,  call and ask about the destination then cancel, and more. Worst service."""
+
+MIXED_TEXT = """I am mixed"""
+
+TEXTS = [GOOD_TEXT, BAD_TEXT, MIXED_TEXT]
+
+BUY_INTENT = """I am interested in this style of PGN-ES-D-6150 /Direct drive energy saving servo motor price and in doing business with you. Could you please send me the quotation"""
+
+SELL_INTENT = """Black full body massage chair for sale."""
+
+BUY_SELL_TEXTS = [BUY_INTENT, SELL_INTENT]
+
+
+def test_zero_shot_analyzer(zero_shot_analyzer):
+    labels = ["facility", "food", "comfortable", "positive", "negative"]
+
+    source_responses = [
+        TextPayload(processed_text=text, source_name="sample") for text in TEXTS
+    ]
+    analyzer_responses = zero_shot_analyzer.analyze_input(
+        source_response_list=source_responses,
+        analyzer_config=ClassificationAnalyzerConfig(labels=labels),
+    )
+
+    assert len(analyzer_responses) == len(TEXTS)
+
+    for analyzer_response in analyzer_responses:
+        assert len(analyzer_response.segmented_data["classifier_data"]) == len(labels)
+        assert "positive" in analyzer_response.segmented_data["classifier_data"]
+        assert "negative" in analyzer_response.segmented_data["classifier_data"]
+
+
+@pytest.mark.parametrize(
+    "label_map, expected", [
+        (None, ["LABEL_1", "LABEL_0"]),
+        ({"LABEL_1": "Buy", "LABEL_0": "Sell"}, ["Buy", "Sell"])
+    ]
+)
+def test_text_classification_analyzer(text_classification_analyzer, label_map, expected):
+    source_responses = [
+        TextPayload(processed_text=text, source_name="sample")
+        for text in BUY_SELL_TEXTS
+    ]
+    analyzer_responses = text_classification_analyzer.analyze_input(
+        source_response_list=source_responses,
+        analyzer_config=ClassificationAnalyzerConfig(
+            label_map=label_map,
+        ),
+    )
+
+    assert len(analyzer_responses) == len(BUY_SELL_TEXTS)
+
+    for analyzer_response in analyzer_responses:
+        assert analyzer_response.segmented_data["classifier_data"] is not None
+        assert analyzer_response.segmented_data["classifier_data"].keys() <= set(expected)
+
+
+@pytest.mark.parametrize(
+    "aggregate_function", [ClassificationAverageScore(), ClassificationMaxCategories()]
+)
+def test_classification_analyzer_with_splitter_aggregator(
+    aggregate_function, zero_shot_analyzer
+):
+    labels = ["facility", "food", "comfortable", "positive", "negative"]
+
+    source_responses = [
+        TextPayload(processed_text=text, source_name="sample") for text in TEXTS
+    ]
+    analyzer_responses = zero_shot_analyzer.analyze_input(
+        source_response_list=source_responses,
+        analyzer_config=ClassificationAnalyzerConfig(
+            labels=labels,
+            use_splitter_and_aggregator=True,
+            splitter_config=TextSplitterConfig(max_split_length=50),
+            aggregator_config=InferenceAggregatorConfig(
+                aggregate_function=aggregate_function
+            ),
+        ),
+    )
+
+    assert len(analyzer_responses) == len(TEXTS)
+
+    for analyzer_response in analyzer_responses:
+        assert "aggregator_data" in analyzer_response.segmented_data
+
+
+def test_vader_analyzer(vader_analyzer):
+    source_responses = [
+        TextPayload(processed_text=text, source_name="sample") for text in TEXTS
+    ]
+    analyzer_responses = vader_analyzer.analyze_input(
+        source_response_list=source_responses
+    )
+
+    assert len(analyzer_responses) == len(TEXTS)
+
+    for analyzer_response in analyzer_responses:
+        assert len(analyzer_response.segmented_data["classifier_data"]) == 2
+        assert "positive" in analyzer_response.segmented_data["classifier_data"]
+        assert "negative" in analyzer_response.segmented_data["classifier_data"]
+
+
+def test_trf_ner_analyzer(trf_ner_analyzer):
+    source_responses = [
+        TextPayload(
+            processed_text="My name is Sam and I live in Berlin, Germany.",
+            source_name="sample",
+        )
+    ]
+    analyzer_responses = trf_ner_analyzer.analyze_input(
+        source_response_list=source_responses,
+    )
+    assert len(analyzer_responses) == 1
+
+    entities = analyzer_responses[0].segmented_data["ner_data"]
+    matched_count = 0
+    for entity in entities:
+        if entity["word"] == "Sam" and entity["entity_group"] == "PER":
+            matched_count = matched_count + 1
+        elif entity["word"] == "Berlin" and entity["entity_group"] == "LOC":
+            matched_count = matched_count + 1
+        elif entity["word"] == "Germany" and entity["entity_group"] == "LOC":
+            matched_count = matched_count + 1
+
+    assert matched_count == 3
+
+
+def test_spacy_ner_analyzer(spacy_ner_analyzer):
+    source_responses = [
+        TextPayload(
+            processed_text="My name is Sam and I live in Berlin, Germany.",
+            source_name="sample",
+        )
+    ]
+    analyzer_responses = spacy_ner_analyzer.analyze_input(
+        source_response_list=source_responses,
+    )
+    assert len(analyzer_responses) == 1
+
+    entities = analyzer_responses[0].segmented_data["ner_data"]
+    matched_count = 0
+    for entity in entities:
+        if entity["word"] == "Sam" and entity["entity_group"] == "PERSON":
+            matched_count = matched_count + 1
+        elif entity["word"] == "Berlin" and entity["entity_group"] == "GPE":
+            matched_count = matched_count + 1
+        elif entity["word"] == "Germany" and entity["entity_group"] == "GPE":
+            matched_count = matched_count + 1
+
+    assert matched_count == 3
diff --git a/obsei_module/test/test_imports.py b/obsei_module/test/test_imports.py
new file mode 100644
index 0000000000000000000000000000000000000000..02ca815d1872d0ddd4e506f2580ff2ca97da23e2
--- /dev/null
+++ b/obsei_module/test/test_imports.py
@@ -0,0 +1,65 @@
+
+def test_imports_all():
+    test_core()
+    test_sources()
+    test_sink()
+    test_analyzer()
+
+
+def test_sources():
+    from obsei.source.base_source import BaseSource, BaseSourceConfig
+    from obsei.source.appstore_scrapper import AppStoreScrapperSource, AppStoreScrapperConfig
+    from obsei.source.email_source import EmailSource, EmailConfig, EmailCredInfo
+    from obsei.source.facebook_source import FacebookSource, FacebookSourceConfig, FacebookCredentials
+    from obsei.source.google_news_source import GoogleNewsSource, GoogleNewsConfig
+    from obsei.source.pandas_source import PandasSource, PandasSourceConfig
+    from obsei.source.playstore_reviews import PlayStoreSource, PlayStoreConfig, GoogleCredInfo
+    from obsei.source.playstore_scrapper import PlayStoreScrapperSource, PlayStoreScrapperConfig
+    from obsei.source.reddit_source import RedditSource, RedditConfig, RedditCredInfo
+    from obsei.source.reddit_scrapper import RedditScrapperSource, RedditScrapperConfig
+    from obsei.source.twitter_source import TwitterSource, TwitterSourceConfig, TwitterCredentials
+    from obsei.source.website_crawler_source import BaseCrawlerConfig, TrafilaturaCrawlerSource, TrafilaturaCrawlerConfig
+
+
+def test_sink():
+    from obsei.sink.base_sink import BaseSink, BaseSinkConfig
+    from obsei.sink.dailyget_sink import DailyGetSink, DailyGetSinkConfig, PayloadConvertor
+    from obsei.sink.elasticsearch_sink import ElasticSearchSink, ElasticSearchSinkConfig
+    from obsei.sink.http_sink import HttpSink, HttpSinkConfig
+    from obsei.sink.jira_sink import JiraSink, JiraSinkConfig, JiraPayloadConvertor
+    from obsei.sink.logger_sink import LoggerSink, LoggerSinkConfig
+    from obsei.sink.pandas_sink import PandasSink, PandasSinkConfig, PandasConvertor
+    from obsei.sink.slack_sink import SlackSink, SlackSinkConfig
+    from obsei.sink.zendesk_sink import ZendeskSink, ZendeskSinkConfig
+
+
+def test_analyzer():
+    from obsei.analyzer.base_analyzer import BaseAnalyzer, BaseAnalyzerConfig
+    from obsei.analyzer.dummy_analyzer import DummyAnalyzer, DummyAnalyzerConfig
+    from obsei.analyzer.ner_analyzer import TransformersNERAnalyzer, SpacyNERAnalyzer
+    from obsei.analyzer.pii_analyzer import PresidioPIIAnalyzer, PresidioPIIAnalyzerConfig, PresidioAnonymizerConfig, PresidioModelConfig, PresidioEngineConfig
+    from obsei.analyzer.sentiment_analyzer import VaderSentimentAnalyzer, TransformersSentimentAnalyzerConfig, TransformersSentimentAnalyzer
+    from obsei.analyzer.translation_analyzer import TranslationAnalyzer
+    from obsei.analyzer.classification_analyzer import ClassificationAnalyzerConfig, ZeroShotClassificationAnalyzer, TextClassificationAnalyzer
+
+    from obsei.postprocessor.base_postprocessor import BasePostprocessor, BasePostprocessorConfig
+    from obsei.postprocessor.inference_aggregator import InferenceAggregatorConfig, InferenceAggregator
+    from obsei.postprocessor.inference_aggregator_function import BaseInferenceAggregateFunction, ClassificationAverageScore, ClassificationMaxCategories
+
+    from obsei.preprocessor.base_preprocessor import BaseTextPreprocessor, BaseTextProcessorConfig
+    from obsei.preprocessor.text_cleaner import TextCleaner, TextCleanerConfig
+    from obsei.preprocessor.text_splitter import TextSplitter, TextSplitterConfig, TextSplitterPayload
+    from obsei.preprocessor.text_tokenizer import BaseTextTokenizer, NLTKTextTokenizer
+    from obsei.preprocessor.text_cleaning_function import TextCleaningFunction, ToLowerCase, RemoveStopWords, \
+        RemovePunctuation, TokenStemming, RemoveSpecialChars, RemoveWhiteSpaceAndEmptyToken, DecodeUnicode, \
+        RemoveDateTime, ReplaceDomainKeywords, RegExSubstitute, SpacyLemmatization
+
+
+def test_core():
+    from obsei.configuration import ObseiConfiguration
+    from obsei.payload import BasePayload, TextPayload
+    from obsei.processor import Processor
+
+    from obsei.workflow.base_store import BaseStore
+    from obsei.workflow.store import WorkflowStore, WorkflowTable
+    from obsei.workflow.workflow import Workflow, WorkflowState, WorkflowConfig
diff --git a/obsei_module/test/test_inference_aggregator.py b/obsei_module/test/test_inference_aggregator.py
new file mode 100644
index 0000000000000000000000000000000000000000..aeb3b970d4ebcb7decab6c7dc1c749a5fa2766f2
--- /dev/null
+++ b/obsei_module/test/test_inference_aggregator.py
@@ -0,0 +1,4 @@
+
+# TDB
+# There are few more iteration required before stablizing this module (specially input and output configuration)
+# Few things are already being tested in test_classification_analyzer_with_splitter_aggregator function
diff --git a/obsei_module/test/test_pii_analyzer.py b/obsei_module/test/test_pii_analyzer.py
new file mode 100644
index 0000000000000000000000000000000000000000..494055fadec7eb2f5bd9ce99c140035f3b0910b7
--- /dev/null
+++ b/obsei_module/test/test_pii_analyzer.py
@@ -0,0 +1,88 @@
+from obsei.payload import TextPayload
+from obsei.analyzer.pii_analyzer import PresidioPIIAnalyzerConfig
+
+text_to_anonymize = "His name is Mr. Jones. His phone number is 212-555-5555 and email is jones@email.com"
+PII_LIST = ["Jones", "212-555-5555", "jones@email.com"]
+TEXTS = [text_to_anonymize]
+
+
+def test_pii_analyzer_replace_original(pii_analyzer):
+    analyzer_config = PresidioPIIAnalyzerConfig(
+        analyze_only=False, return_decision_process=True, replace_original_text=True
+    )
+
+    source_responses = [
+        TextPayload(processed_text=text, source_name="sample") for text in TEXTS
+    ]
+    analyzer_responses = pii_analyzer.analyze_input(
+        source_response_list=source_responses, analyzer_config=analyzer_config
+    )
+    assert len(analyzer_responses) == len(TEXTS)
+
+    for text, analyzer_response in zip(TEXTS, analyzer_responses):
+
+        assert analyzer_response.segmented_data is not None
+        assert analyzer_response.segmented_data["pii_data"] is not None
+        assert analyzer_response.segmented_data["pii_data"]["analyzer_result"] is not None
+        assert analyzer_response.segmented_data["pii_data"]["anonymized_result"] is not None
+        assert analyzer_response.segmented_data["pii_data"]["anonymized_text"] is not None
+
+        for pii_info in PII_LIST:
+            assert pii_info not in analyzer_response.segmented_data["pii_data"]["anonymized_text"]
+
+        assert (
+            analyzer_response.segmented_data["pii_data"]["anonymized_text"]
+            == analyzer_response.processed_text
+        )
+        assert analyzer_response.segmented_data["pii_data"]["anonymized_text"] != text
+
+
+def test_pii_analyzer_not_replace_original(pii_analyzer):
+    analyzer_config = PresidioPIIAnalyzerConfig(
+        analyze_only=False, return_decision_process=True, replace_original_text=False
+    )
+
+    source_responses = [
+        TextPayload(processed_text=text, source_name="sample") for text in TEXTS
+    ]
+    analyzer_responses = pii_analyzer.analyze_input(
+        source_response_list=source_responses, analyzer_config=analyzer_config
+    )
+    assert len(analyzer_responses) == len(TEXTS)
+
+    for text, analyzer_response in zip(TEXTS, analyzer_responses):
+
+        assert analyzer_response.segmented_data is not None
+        assert analyzer_response.segmented_data["pii_data"] is not None
+        assert analyzer_response.segmented_data["pii_data"]["analyzer_result"] is not None
+        assert analyzer_response.segmented_data["pii_data"]["anonymized_result"] is not None
+        assert analyzer_response.segmented_data["pii_data"]["anonymized_text"] is not None
+
+        for pii_info in PII_LIST:
+            assert pii_info not in analyzer_response.segmented_data["pii_data"]["anonymized_text"]
+
+        assert analyzer_response.processed_text == text
+        assert analyzer_response.segmented_data["pii_data"]["anonymized_text"] != text
+
+
+def test_pii_analyzer_analyze_only(pii_analyzer):
+    analyzer_config = PresidioPIIAnalyzerConfig(
+        analyze_only=True, return_decision_process=True
+    )
+
+    source_responses = [
+        TextPayload(processed_text=text, source_name="sample") for text in TEXTS
+    ]
+    analyzer_responses = pii_analyzer.analyze_input(
+        source_response_list=source_responses, analyzer_config=analyzer_config
+    )
+    assert len(analyzer_responses) == len(TEXTS)
+
+    for text, analyzer_response in zip(TEXTS, analyzer_responses):
+
+        assert analyzer_response.segmented_data is not None
+        assert analyzer_response.segmented_data["pii_data"] is not None
+        assert analyzer_response.segmented_data["pii_data"]["analyzer_result"] is not None
+        assert analyzer_response.segmented_data["pii_data"]["anonymized_result"] is None
+
+        assert text == analyzer_response.processed_text
diff --git a/obsei_module/test/test_text_cleaner.py b/obsei_module/test/test_text_cleaner.py
new file mode 100644
index 0000000000000000000000000000000000000000..c1cad2ac0c1d70d8fec57d4ba0b08425c92f6250
--- /dev/null
+++ b/obsei_module/test/test_text_cleaner.py
@@ -0,0 +1,177 @@
+from obsei.payload import TextPayload
+from obsei.preprocessor.text_cleaner import TextCleanerConfig
+from obsei.preprocessor.text_cleaning_function import DecodeUnicode, RemoveDateTime, RemovePunctuation, \
+    RemoveSpecialChars, RemoveStopWords, RemoveWhiteSpaceAndEmptyToken, ReplaceDomainKeywords, ToLowerCase, \
+    RegExSubstitute, SpacyLemmatization
+
+TEXT_WITH_WHITE_SPACES = """        If anyone is interested... these are our hosts. I can’t recommend them enough,
+Abc & Pbc.         """
+
+TEXT_WITH_PUNCTUATION = """I had the worst experience ever with XYZ in \"Egypt\". Bad Cars, asking to pay in cash,"""
+
+TEXT_WITH_SPECIAL_CHARACTERS = """#datascience @shahrukh & @lalit developing $obsei"""
+
+TEXT_WITH_DATE_TIME = (
+    """Peter drinks likely likes to tea at 16:45 every 15th May 2021"""
+)
+
+TEXT_WITH_DOMAIN_WORDS = (
+    """DL and ML are going to change the world and will not overfit"""
+)
+
+TEXT_WITH_STOP_WORDS = """In off then and hello, obsei"""
+
+TEXT_WITH_UPPER_CASE = """HOW IS THIS POSSIBLE???"""
+
+TEXT_WITH_UNICODE = """what is this \u0021 \u0021 \u0021"""
+
+
+def test_white_space_cleaner(text_cleaner):
+    request = TextPayload(processed_text=TEXT_WITH_WHITE_SPACES)
+
+    config = TextCleanerConfig(cleaning_functions=[RemoveWhiteSpaceAndEmptyToken()])
+    cleaner_responses = text_cleaner.preprocess_input(
+        config=config, input_list=[request]
+    )
+    cleaner_response = cleaner_responses[0]
+    assert (
+        """If anyone is interested ... these are our hosts . I can ’ t recommend them enough , Abc & Pbc ."""
+        == cleaner_response.processed_text
+    )
+
+
+def test_lower_case(text_cleaner):
+    request = TextPayload(processed_text=TEXT_WITH_UPPER_CASE)
+
+    config = TextCleanerConfig(cleaning_functions=[ToLowerCase()])
+    cleaner_responses = text_cleaner.preprocess_input(
+        config=config, input_list=[request]
+    )
+    cleaner_response = cleaner_responses[0]
+
+    assert "how is this possible ? ? ?" == cleaner_response.processed_text
+
+
+def test_remove_punctuation(text_cleaner):
+    request = TextPayload(processed_text=TEXT_WITH_PUNCTUATION)
+
+    config = TextCleanerConfig(cleaning_functions=[RemovePunctuation()])
+    cleaner_responses = text_cleaner.preprocess_input(
+        config=config, input_list=[request]
+    )
+    cleaner_response = cleaner_responses[0]
+    assert (
+        "I had the worst experience ever with XYZ in Egypt Bad Cars asking to pay in cash"
+        == cleaner_response.processed_text
+    )
+
+
+def test_remove_date_time(text_cleaner):
+    request = TextPayload(processed_text=TEXT_WITH_DATE_TIME)
+
+    config = TextCleanerConfig(cleaning_functions=[RemoveDateTime()])
+    cleaner_responses = text_cleaner.preprocess_input(
+        config=config, input_list=[request]
+    )
+    cleaner_response = cleaner_responses[0]
+    assert (
+        "Peter drinks likely likes to tea at every" == cleaner_response.processed_text
+    )
+
+
+def test_remove_stop_words(text_cleaner):
+    request = TextPayload(processed_text=TEXT_WITH_STOP_WORDS)
+
+    config = TextCleanerConfig(cleaning_functions=[RemoveStopWords(language="english")])
+    cleaner_responses = text_cleaner.preprocess_input(
+        config=config, input_list=[request]
+    )
+    cleaner_response = cleaner_responses[0]
+    assert "In hello , obsei" == cleaner_response.processed_text
+
+
+def test_remove_special_characters(text_cleaner):
+    request = TextPayload(processed_text=TEXT_WITH_SPECIAL_CHARACTERS)
+
+    config = TextCleanerConfig(cleaning_functions=[RemoveSpecialChars()])
+
+    cleaner_responses = text_cleaner.preprocess_input(
+        config=config, input_list=[request]
+    )
+    cleaner_response = cleaner_responses[0]
+    assert (
+        "datascience shahrukh lalit developing obsei" == cleaner_response.processed_text
+    )
+
+
+def test_replace_domain_keywords(text_cleaner):
+    request = TextPayload(processed_text=TEXT_WITH_DOMAIN_WORDS)
+
+    config = TextCleanerConfig(
+        cleaning_functions=[
+            ReplaceDomainKeywords(
+                domain_keywords=[("ML", "machine learning"), ("DL", "deep learning")]
+            )
+        ]
+    )
+
+    cleaner_responses = text_cleaner.preprocess_input(
+        config=config, input_list=[request]
+    )
+    cleaner_response = cleaner_responses[0]
+    assert (
+        "deep learning and machine learning are going to change the world and will not overfit"
+        == cleaner_response.processed_text
+    )
+
+
+def test_decode_unicode(text_cleaner):
+    request = TextPayload(processed_text=TEXT_WITH_UNICODE)
+
+    config = TextCleanerConfig(cleaning_functions=[DecodeUnicode()])
+
+    cleaner_responses = text_cleaner.preprocess_input(
+        config=config, input_list=[request]
+    )
+    cleaner_response = cleaner_responses[0]
+    assert "what is this ! ! !" == cleaner_response.processed_text
+
+
+def test_regex(text_cleaner):
+    request = TextPayload(processed_text="Obsei-is-a-lowcode-lib")
+
+    config = TextCleanerConfig(
+        cleaning_functions=[
+            RegExSubstitute(
+                pattern=r'-',
+                substitute=" "
+            )
+        ]
+    )
+
+    cleaner_responses = text_cleaner.preprocess_input(
+        config=config, input_list=[request]
+    )
+    cleaner_response = cleaner_responses[0]
+    assert (
+        "Obsei is a lowcode lib"
+        == cleaner_response.processed_text
+    )
+
+
+def test_spacy_lemmatizer(text_cleaner):
+    request = TextPayload(processed_text=u'the bats saw the cats with best stripes hanging upside down by their feet')
+
+    config = TextCleanerConfig(
+        disable_tokenization=True,
+        cleaning_functions=[SpacyLemmatization()]
+    )
+
+    cleaner_responses = text_cleaner.preprocess_input(
+        config=config, input_list=[request]
+    )
+    cleaner_response = cleaner_responses[0]
+    assert (
+        'the bat see the cat with good stripe hang upside down by their foot'
+        == cleaner_response.processed_text
+    )
diff --git a/obsei_module/test/test_text_splitter.py b/obsei_module/test/test_text_splitter.py
new file mode 100644
index 0000000000000000000000000000000000000000..10a24839e51e38e513b9598528aeef67c02559f1
--- /dev/null
+++ b/obsei_module/test/test_text_splitter.py
@@ -0,0 +1,97 @@
+import pytest
+
+from obsei.preprocessor.text_splitter import TextSplitterConfig
+from obsei.payload import TextPayload
+
+DOCUMENT_1 = """I love playing console games."""
+DOCUMENT_2 = """Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ bee-YON-say; born September 4, 1981)[6] is an American singer, songwriter, record producer, and actress. Born and raised in Houston, Texas, Beyoncé performed in various singing and dancing competitions as a child. She rose to fame in the late 1990s as the lead singer of Destiny's Child, one of the best-selling girl groups of all time. Their hiatus saw the release of her first solo album, Dangerously in Love (2003), which featured the US Billboard Hot 100 number-one singles "Crazy in Love" and "Baby Boy". Following the 2006 disbandment of Destiny's Child, she released her second solo album, B'Day, which contained hit singles "Irreplaceable" and "Beautiful Liar". Beyoncé also starred in multiple films such as The Pink Panther (2006), Dreamgirls (2006), Obsessed (2009), and The Lion King (2019). Her marriage to Jay-Z and her portrayal of Etta James in Cadillac Records (2008) influenced her third album, I Am... Sasha Fierce (2008), which earned a record-setting six Grammy Awards in 2010. It spawned the successful singles "If I Were a Boy", "Single Ladies (Put a Ring on It)", and "Halo". After splitting from her manager and father Mathew Knowles in 2010, Beyoncé released her musically diverse fourth album 4 in 2011. She later achieved universal acclaim for her sonically experimental visual albums, Beyoncé (2013) and Lemonade (2016), the latter of which was the world's best-selling album of 2016 and the most acclaimed album of her career, exploring themes of infidelity and womanism. In 2018, she released Everything Is Love, a collaborative album with her husband, Jay-Z, as the Carters. As a featured artist, Beyoncé topped the Billboard Hot 100 with the remixes of "Perfect" by Ed Sheeran in 2017 and "Savage" by Megan Thee Stallion in 2020. The same year, she released the musical film and visual album Black Is King to widespread acclaim."""
+DOCUMENT_3 = ''' Artificial intelligence (AI) is intelligence demonstrated by machines, as opposed to the natural intelligence displayed by humans or animals. Leading AI textbooks define the field as the study of "intelligent agents": any system that perceives its environment and takes actions that maximize its chance of achieving its goals.[a] Some popular accounts use the term "artificial intelligence" to describe machines that mimic "cognitive" functions that humans associate with the human mind, such as "learning" and "problem solving", however this definition is rejected by major AI researchers.
+
+AI applications include advanced web search engines (i.e. Google), recommendation systems (used by YouTube, Amazon and Netflix), understanding human speech (such as Siri or Alexa), self-driving cars (e.g. Tesla), and competing at the highest level in strategic game systems (such as chess and Go). As machines become increasingly capable, tasks considered to require "intelligence" are often removed from the definition of AI, a phenomenon known as the AI effect. For instance, optical character recognition is frequently excluded from things considered to be AI, having become a routine technology.
+
+Artificial intelligence was founded as an academic discipline in 1956, and in the years since has experienced several waves of optimism, followed by disappointment and the loss of funding (known as an "AI winter"), followed by new approaches, success and renewed funding. AI research has tried and discarded many different approaches during its lifetime, including simulating the brain, modeling human problem solving, formal logic, large databases of knowledge and imitating animal behavior. In the first decades of the 21st century, highly mathematical statistical machine learning has dominated the field, and this technique has proved highly successful, helping to solve many challenging problems throughout industry and academia. 
+'''
+DOC1_VAL = [29]
+
+
+@pytest.mark.parametrize(
+    "doc, expected_lengths, stride",
+    [
+        (DOCUMENT_1, DOC1_VAL, 0),
+        (DOCUMENT_1, DOC1_VAL, 128),
+        (DOCUMENT_2, [503, 512, 504, 384], 0),
+        (DOCUMENT_2, [503, 512, 507, 505, 394], 128),
+        (DOCUMENT_3, [511, 509, 512, 395], 0),
+        (DOCUMENT_3, [511, 512, 512, 512, 402], 128)
+    ]
+)
+def test_char_splits_without_paragraph_honor(doc, expected_lengths, stride, text_splitter):
+    doc_splits = text_splitter.preprocess_input(
+        input_list=[TextPayload(processed_text=doc)],
+        config=TextSplitterConfig(
+            max_split_length=512,
+            split_stride=stride
+        ),
+    )
+
+    assert len(expected_lengths) == len(doc_splits)
+    for text_payload, expected_length in zip(doc_splits, expected_lengths):
+        assert "splitter" in text_payload.meta
+        splitter_payload = text_payload.meta["splitter"]
+        assert splitter_payload.chunk_length == expected_length
+
+
+@pytest.mark.parametrize(
+    "doc, expected_lengths, stride",
+    [
+        (DOCUMENT_1, DOC1_VAL, 0),
+        (DOCUMENT_1, DOC1_VAL, 10),
+        (DOCUMENT_2, [126, 124, 123, 127, 125, 128, 119, 122, 124, 124, 125, 123, 126, 128, 128, 19], 0),
+        (DOCUMENT_2, [126, 125, 121, 122, 128, 121, 121, 125, 122, 125, 128, 122, 126, 124, 128, 127, 116], 10),
+        (DOCUMENT_3, [123, 124, 128, 118, 94, 128, 121, 115, 128, 103, 126, 127, 124, 125, 125, 104], 0),
+        (DOCUMENT_3, [123, 123, 120, 126, 123, 33, 128, 128, 124, 122, 123, 28, 126, 124, 123, 120, 122, 124, 67], 10)
+    ]
+)
+def test_char_splits_with_paragraph_honor(doc, expected_lengths, stride, text_splitter):
+    doc_splits = text_splitter.preprocess_input(
+        input_list=[TextPayload(processed_text=doc)],
+        config=TextSplitterConfig(
+            max_split_length=128,
+            split_stride=stride,
+            honor_paragraph_boundary=True,
+        ),
+    )
+
+    assert len(expected_lengths) == len(doc_splits)
+    for text_payload, expected_length in zip(doc_splits, expected_lengths):
+        assert "splitter" in text_payload.meta
+        splitter_payload = text_payload.meta["splitter"]
+        assert splitter_payload.chunk_length == expected_length
+
+
+@pytest.mark.parametrize(
+    "doc, expected_lengths, stride",
+    [
+        (DOCUMENT_1, DOC1_VAL, 0),
+        (DOCUMENT_1, DOC1_VAL, 10),
+        (DOCUMENT_2, [149, 108, 122, 172, 159, 133, 194, 100, 130, 270, 104, 155, 98], 0),
+        (DOCUMENT_2, [149, 108, 122, 172, 159, 133, 194, 100, 130, 270, 104, 155, 98], 10),
+        (DOCUMENT_3, [142, 184, 264, 57, 146, 92, 165, 135, 271, 220, 241], 0),
+        (DOCUMENT_3, [142, 184, 264, 57, 146, 92, 165, 135, 271, 220, 241], 10)
+    ]
+)
+def test_sentence_splits(doc, expected_lengths, stride, text_splitter):
+    doc_splits = text_splitter.preprocess_input(
+        input_list=[TextPayload(processed_text=doc)],
+        config=TextSplitterConfig(
+            max_split_length=512,
+            split_stride=stride,
+            enable_sentence_split=True,
+        ),
+    )
+
+    assert len(expected_lengths) == len(doc_splits)
+    for text_payload, expected_length in zip(doc_splits, expected_lengths):
+        assert "splitter" in text_payload.meta
+        splitter_payload = text_payload.meta["splitter"]
+        assert splitter_payload.chunk_length == expected_length
diff --git a/obsei_module/test/test_translator.py b/obsei_module/test/test_translator.py
new file mode 100644
index 0000000000000000000000000000000000000000..cda355b86cce4f3c09dff7af27a344b29ecdd299
--- /dev/null
+++ b/obsei_module/test/test_translator.py
@@ -0,0 +1,29 @@
+from obsei.payload import TextPayload
+
+GOOD_TEXT = """मुझे सब चीजे बहुत अच्छी लगी ।"""
+
+BAD_TEXT = """यह जीवन का सबसे बुरा अनुभव था । खराब कारें, नकद में भुगतान करने के लिए कह रहे हैं, पर्याप्त ईंधन नहीं है,
+एसी न खोलें, मेरे स्थान से बहुत दूर तक प्रतीक्षा करें जब तक कि यात्रा रद्द न हो जाए, कॉल करें और गंतव्य के बारे में पूछें, फिर रद्द करें, और बहुत कुछ।सबसे खराब सेवा। """
+MIXED_TEXT = """ठीक ठाक सेवा थी । बहुत कुछ खास नहीं ।"""
+EMOTICONS_TEXT = """Sab kuch theek hai ✌✌✌✌✌☝☝☝☝☝👌👌👌👌👌👌👍👍👍👍👍📿📿📿🛍🛍🕶🕳🕳👁🗨🗯👁‍🗨🖖👉✋💟👍😊"""
+HINGLISH_TEXT = """mera naam joker, tera naam kya ?"""
+
+TEXTS = [GOOD_TEXT, BAD_TEXT, MIXED_TEXT, EMOTICONS_TEXT, HINGLISH_TEXT]
+
+# for running in offline mode - https://huggingface.co/transformers/installation.html#offline-mode
+
+
+def test_translate_analyzer(translate_analyzer):
+    source_responses = [
+        TextPayload(processed_text=text, source_name="sample") for text in TEXTS
+    ]
+    analyzer_responses = translate_analyzer.analyze_input(
+        source_response_list=source_responses,
+    )
+    assert len(analyzer_responses) == len(TEXTS)
+
+    for text, analyzer_response in zip(TEXTS, analyzer_responses):
+        assert analyzer_response.segmented_data is not None
+        assert analyzer_response.segmented_data["translation_data"] is not None
+        assert text == analyzer_response.segmented_data["translation_data"]["original_text"]
+        assert text != analyzer_response.processed_text
diff --git a/obsei_module/tutorials/01_PlayStore_Classification_Logger.ipynb b/obsei_module/tutorials/01_PlayStore_Classification_Logger.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..da74d0f2e11792d6083ad34cce4d03aaa420c5d4
--- /dev/null
+++ b/obsei_module/tutorials/01_PlayStore_Classification_Logger.ipynb
@@ -0,0 +1,310 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "name": "01_PlayStore_Classification_Logger.ipynb",
+      "provenance": [],
+      "authorship_tag": "ABX9TyOk1SXBDdRZeDhvbn/BSTZY",
+      "include_colab_link": true
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "view-in-github",
+        "colab_type": "text"
+      },
+      "source": [
+        "<a href=\"https://colab.research.google.com/github/obsei/obsei/blob/master/example/Obsei_playstore_classification_logger_example.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "hVJynVqofWGx"
+      },
+      "source": [
+        "## Install latest Obsei"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "gz1fUXcEfQmv",
+        "outputId": "217e25ef-9906-4110-c8c9-f051972aae3b"
+      },
+      "source": [
+        "!pip install obsei[all]"
+      ],
+      "execution_count": 1,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "Collecting git+https://github.com/obsei/obsei.git\n",
+            "  Cloning https://github.com/obsei/obsei.git to /tmp/pip-req-build-qs53rb_m\n",
+            "  Running command git clone -q https://github.com/obsei/obsei.git /tmp/pip-req-build-qs53rb_m\n",
+            "Requirement already satisfied (use --upgrade to upgrade): obsei==0.0.6 from git+https://github.com/obsei/obsei.git in /usr/local/lib/python3.7/dist-packages\n",
+            "Requirement already satisfied: app-store-reviews-reader==1.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (1.0)\n",
+            "Requirement already satisfied: apscheduler==3.7.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (3.7.0)\n",
+            "Requirement already satisfied: atlassian-python-api==3.8.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (3.8.0)\n",
+            "Requirement already satisfied: beautifulsoup4==4.9.3 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (4.9.3)\n",
+            "Requirement already satisfied: cachetools==4.2.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (4.2.1)\n",
+            "Requirement already satisfied: certifi==2020.12.5 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (2020.12.5)\n",
+            "Requirement already satisfied: chardet==4.0.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (4.0.0)\n",
+            "Requirement already satisfied: click==7.1.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (7.1.2)\n",
+            "Requirement already satisfied: deprecated==1.2.12 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (1.2.12)\n",
+            "Requirement already satisfied: elasticsearch==7.11.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (7.11.0)\n",
+            "Requirement already satisfied: fastapi==0.63.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (0.63.0)\n",
+            "Requirement already satisfied: feedparser==6.0.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (6.0.2)\n",
+            "Requirement already satisfied: filelock==3.0.12 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (3.0.12)\n",
+            "Requirement already satisfied: flask==1.1.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (1.1.2)\n",
+            "Requirement already satisfied: google-api-core==1.26.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (1.26.1)\n",
+            "Requirement already satisfied: google-api-python-client==2.0.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (2.0.2)\n",
+            "Requirement already satisfied: google-auth-httplib2==0.1.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (0.1.0)\n",
+            "Requirement already satisfied: google-auth==1.28.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (1.28.0)\n",
+            "Requirement already satisfied: google-play-scraper==0.1.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (0.1.2)\n",
+            "Requirement already satisfied: googleapis-common-protos==1.53.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (1.53.0)\n",
+            "Requirement already satisfied: greenlet==1.0.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (1.0.0)\n",
+            "Requirement already satisfied: gunicorn==20.0.4 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (20.0.4)\n",
+            "Requirement already satisfied: h11==0.12.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (0.12.0)\n",
+            "Requirement already satisfied: httplib2==0.19.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (0.19.0)\n",
+            "Requirement already satisfied: httptools==0.1.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (0.1.1)\n",
+            "Requirement already satisfied: idna==2.10 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (2.10)\n",
+            "Requirement already satisfied: importlib-metadata==3.7.3 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (3.7.3)\n",
+            "Requirement already satisfied: itsdangerous==1.1.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (1.1.0)\n",
+            "Requirement already satisfied: jinja2==2.11.3 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (2.11.3)\n",
+            "Requirement already satisfied: joblib==1.0.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (1.0.1)\n",
+            "Requirement already satisfied: markupsafe==1.1.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (1.1.1)\n",
+            "Requirement already satisfied: mmh3==3.0.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (3.0.0)\n",
+            "Requirement already satisfied: numpy==1.20.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (1.20.1)\n",
+            "Requirement already satisfied: oauthlib==3.1.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (3.1.0)\n",
+            "Requirement already satisfied: packaging==20.9 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (20.9)\n",
+            "Requirement already satisfied: praw==7.2.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (7.2.0)\n",
+            "Requirement already satisfied: prawcore==2.0.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (2.0.0)\n",
+            "Requirement already satisfied: protobuf==3.15.6 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (3.15.6)\n",
+            "Requirement already satisfied: pyasn1-modules==0.2.8 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (0.2.8)\n",
+            "Requirement already satisfied: pyasn1==0.4.8 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (0.4.8)\n",
+            "Requirement already satisfied: pydantic==1.8.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (1.8.1)\n",
+            "Requirement already satisfied: pyparsing==2.4.7 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (2.4.7)\n",
+            "Requirement already satisfied: python-dateutil==2.8.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (2.8.1)\n",
+            "Requirement already satisfied: pytz==2021.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (2021.1)\n",
+            "Requirement already satisfied: pyyaml==5.4.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (5.4.1)\n",
+            "Requirement already satisfied: reddit-rss-reader==1.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (1.1)\n",
+            "Requirement already satisfied: regex==2021.3.17 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (2021.3.17)\n",
+            "Requirement already satisfied: requests-oauthlib==1.3.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (1.3.0)\n",
+            "Requirement already satisfied: requests==2.25.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (2.25.1)\n",
+            "Requirement already satisfied: rsa==4.7.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (4.7.2)\n",
+            "Requirement already satisfied: sacremoses==0.0.43 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (0.0.43)\n",
+            "Requirement already satisfied: searchtweets-v2==1.0.7 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (1.0.7)\n",
+            "Requirement already satisfied: sgmllib3k==1.0.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (1.0.0)\n",
+            "Requirement already satisfied: six==1.15.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (1.15.0)\n",
+            "Requirement already satisfied: slack-sdk==3.4.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (3.4.2)\n",
+            "Requirement already satisfied: soupsieve==2.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (2.2)\n",
+            "Requirement already satisfied: sqlalchemy==1.4.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (1.4.1)\n",
+            "Requirement already satisfied: starlette==0.13.6 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (0.13.6)\n",
+            "Requirement already satisfied: tokenizers==0.10.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (0.10.1)\n",
+            "Requirement already satisfied: torch==1.8.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (1.8.0+cu101)\n",
+            "Requirement already satisfied: tqdm==4.59.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (4.59.0)\n",
+            "Requirement already satisfied: transformers==4.4.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (4.4.1)\n",
+            "Requirement already satisfied: tweet-preprocessor==0.6.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (0.6.0)\n",
+            "Requirement already satisfied: typing-extensions==3.7.4.3 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (3.7.4.3)\n",
+            "Requirement already satisfied: tzlocal==2.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (2.1)\n",
+            "Requirement already satisfied: update-checker==0.18.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (0.18.0)\n",
+            "Requirement already satisfied: uritemplate==3.0.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (3.0.1)\n",
+            "Requirement already satisfied: urllib3==1.26.4 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (1.26.4)\n",
+            "Requirement already satisfied: uvicorn==0.13.4 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (0.13.4)\n",
+            "Requirement already satisfied: uvloop==0.15.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (0.15.2)\n",
+            "Requirement already satisfied: vadersentiment==3.3.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (3.3.2)\n",
+            "Requirement already satisfied: websocket-client==0.58.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (0.58.0)\n",
+            "Requirement already satisfied: werkzeug==1.0.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (1.0.1)\n",
+            "Requirement already satisfied: wrapt==1.12.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (1.12.1)\n",
+            "Requirement already satisfied: zenpy==2.0.24 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (2.0.24)\n",
+            "Requirement already satisfied: zipp==3.4.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.6) (3.4.1)\n",
+            "Requirement already satisfied: setuptools>=0.7 in /usr/local/lib/python3.7/dist-packages (from apscheduler==3.7.0->obsei==0.0.6) (54.2.0)\n",
+            "Building wheels for collected packages: obsei\n",
+            "  Building wheel for obsei (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+            "  Created wheel for obsei: filename=obsei-0.0.6-cp37-none-any.whl size=50033 sha256=03495c0dc02b38e0495e9fd47c6e4d0d20a694230ef6bf51e172686f4e4e0c12\n",
+            "  Stored in directory: /tmp/pip-ephem-wheel-cache-tulmgikp/wheels/49/1a/6e/2fd83c9a275b7096fc615a0edef2d55b1fc33c3751ba45c1ad\n",
+            "Successfully built obsei\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "6LBKWAIcgVVQ"
+      },
+      "source": [
+        "## Configure Play Store Scrapper Source\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "jXrKXXD_goqM"
+      },
+      "source": [
+        "from obsei.source.playstore_scrapper import PlayStoreScrapperConfig, PlayStoreScrapperSource\n",
+        "\n",
+        "# initialize play store source config\n",
+        "source_config = PlayStoreScrapperConfig(\n",
+        "   # Need two parameters package_name and country. \n",
+        "   # `package_name` can be found at the end of the url of app in play store. \n",
+        "   # For example - https://play.google.com/store/apps/details?id=com.google.android.gm&hl=en&gl=US\n",
+        "   # `com.google.android.gm` is the package_name for xcode and `us` is country.\n",
+        "   countries=[\"us\"],\n",
+        "   package_name=\"com.google.android.gm\",\n",
+        "   max_count=10, # Number of reviews to fetch\n",
+        "   lookup_period=\"1h\" # Lookup period from current time, format: `<number><d|h|m>` (day|hour|minute)\n",
+        "\n",
+        ")\n",
+        "\n",
+        "# initialize play store reviews retriever\n",
+        "source = PlayStoreScrapperSource()"
+      ],
+      "execution_count": 2,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "H9MyrQBUguFG"
+      },
+      "source": [
+        "## Configure Text Classification Analyzer"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "P4NACxKog2lE"
+      },
+      "source": [
+        "from obsei.analyzer.classification_analyzer import ClassificationAnalyzerConfig, ZeroShotClassificationAnalyzer\n",
+        "\n",
+        "# initialize classification analyzer config\n",
+        "# It can also detect sentiments if \"positive\" and \"negative\" labels are added.\n",
+        "analyzer_config=ClassificationAnalyzerConfig(\n",
+        "   labels=[\"interface\", \"crash\", \"performance\"],\n",
+        ")\n",
+        "\n",
+        "# initialize classification analyzer\n",
+        "# For supported models refer https://huggingface.co/models?filter=zero-shot-classification\n",
+        "text_analyzer = ZeroShotClassificationAnalyzer(\n",
+        "   model_name_or_path=\"typeform/mobilebert-uncased-mnli\",\n",
+        "   device=\"auto\" # change to \"cuda:0\" for using gpu\n",
+        ")"
+      ],
+      "execution_count": 3,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "agsNOkhthVLy"
+      },
+      "source": [
+        "## Configure Logger Sink"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "Jz0xKBzRhdy1"
+      },
+      "source": [
+        "from obsei.sink.logger_sink import LoggerSink, LoggerSinkConfig\n",
+        "import logging\n",
+        "import sys\n",
+        "\n",
+        "logger = logging.getLogger(\"Obsei\")\n",
+        "logging.basicConfig(stream=sys.stdout, level=logging.INFO)\n",
+        "\n",
+        "# initialize logger sink config\n",
+        "sink_config = LoggerSinkConfig(\n",
+        "   logger=logger,\n",
+        "   level=logging.INFO\n",
+        ")\n",
+        "\n",
+        "# initialize logger sink\n",
+        "sink = LoggerSink()"
+      ],
+      "execution_count": 4,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "KvzpBr10hiZO"
+      },
+      "source": [
+        "## Execute Workflow/Pipeline"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "_tEXKQRHhhSs",
+        "outputId": "ef1ab9a9-818d-417b-eeb8-65ccb0bf35e7"
+      },
+      "source": [
+        "# This will fetch information from configured source\n",
+        "source_response_list = source.lookup(source_config)\n",
+        "\n",
+        "# This will execute analyzer\n",
+        "analyzer_response_list = text_analyzer.analyze_input(\n",
+        "    source_response_list=source_response_list,\n",
+        "    analyzer_config=analyzer_config\n",
+        ")\n",
+        "\n",
+        "# This will send analyzed output to sink\n",
+        "sink.send_data(analyzer_response_list, sink_config)"
+      ],
+      "execution_count": 5,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n"
+          ],
+          "name": "stderr"
+        },
+        {
+          "output_type": "stream",
+          "text": [
+            "INFO:Obsei:{'processed_text': 'Very safe app.I thank you for alerting of any problems.', 'segmented_data': {'positive': 0.989725649356842, 'interface': 0.7119664549827576, 'performance': 0.25247907638549805, 'crash': 0.012097842991352081, 'negative': 0.0014631063677370548}, 'meta': {'reviewId': 'gp:AOqpTOHapb-uwZEXjZcKD4FpilLyxqAQ7JzlExlrWF1zCOInfSTBeGlgfYRX-Fil6ztpezMjQD5bEhZCpH1rzvs', 'userName': 'Vickie Corneal', 'userImage': 'https://play-lh.googleusercontent.com/-REISJG7Hpl4/AAAAAAAAAAI/AAAAAAAAAAA/AMZuuclGPx3VL-9ZEUxxWDiPHGlA6gWn3A/photo.jpg', 'content': 'Very safe app.I thank you for alerting of any problems.', 'score': 4, 'thumbsUpCount': 0, 'reviewCreatedVersion': '2019.06.09.254811277.release', 'at': datetime.datetime(2021, 3, 26, 22, 22, 15), 'replyContent': None, 'repliedAt': None}, 'source_name': 'PlayStoreScrapper'}\n",
+            "INFO:Obsei:{'processed_text': \"Had a new phone (pixel 4a) since Jan 2021 & thought it was time to switch to Gmail app for my emails especially since it's ran be Google you'd think it would be flawless with Google's own hardware/software but no! Some of my emails were 5 hours late! So I ended up going back to yahoo mail app as I like to receive my emails instantly.\", 'segmented_data': {'interface': 0.554413378238678, 'positive': 0.5083023905754089, 'performance': 0.28407496213912964, 'negative': 0.13942506909370422, 'crash': 0.02029484324157238}, 'meta': {'reviewId': 'gp:AOqpTOGKj4kCNaXE_qhwAsSxxj6blBdsaeey55PS_m5lfQvJITOhcmxjTzxApImsKvXSVqsP5-JhvW8U1094idM', 'userName': 'Dean Crowder', 'userImage': 'https://play-lh.googleusercontent.com/a-/AOh14Gg9u7B9ARGqJ5trWFzJg5YCPNS3ZlnrRKYcRnHB_Ps', 'content': \"Had a new phone (pixel 4a) since Jan 2021 & thought it was time to switch to Gmail app for my emails especially since it's ran be Google you'd think it would be flawless with Google's own hardware/software but no! Some of my emails were 5 hours late! So I ended up going back to yahoo mail app as I like to receive my emails instantly.\", 'score': 1, 'thumbsUpCount': 0, 'reviewCreatedVersion': '2021.02.21.361635104.Release', 'at': datetime.datetime(2021, 3, 26, 22, 22, 7), 'replyContent': None, 'repliedAt': None}, 'source_name': 'PlayStoreScrapper'}\n",
+            "INFO:Obsei:{'processed_text': 'Keeps shutting down, why?', 'segmented_data': {'negative': 0.9541448354721069, 'crash': 0.25469809770584106, 'interface': 0.03516335040330887, 'performance': 0.004874388221651316, 'positive': 0.0006495638517662883}, 'meta': {'reviewId': 'gp:AOqpTOF2u0yBrFTGRxnxS5g6tcfO93Z2-lJnid_FLVjOsL7tIuDjnLjO3fX3nct84_V-OLCIYzWoanwWHroRP-I', 'userName': 'Honey Linus', 'userImage': 'https://play-lh.googleusercontent.com/-VUlX1zLb2UM/AAAAAAAAAAI/AAAAAAAAAAA/AMZuucm8GbnEB44neDntcgiJhJGMWiVWBA/photo.jpg', 'content': 'Keeps shutting down, why?', 'score': 1, 'thumbsUpCount': 0, 'reviewCreatedVersion': '2021.02.21.361635104.Release', 'at': datetime.datetime(2021, 3, 26, 22, 21, 28), 'replyContent': None, 'repliedAt': None}, 'source_name': 'PlayStoreScrapper'}\n",
+            "INFO:Obsei:{'processed_text': 'My email is the most important part of my phone; for the last three days I cannot open my email please fix my email', 'segmented_data': {'positive': 0.2119247317314148, 'negative': 0.10374686121940613, 'performance': 0.08057017624378204, 'interface': 0.06785237044095993, 'crash': 0.03853990137577057}, 'meta': {'reviewId': 'gp:AOqpTOE_5ZdXP4itD59tteamS13h--JKpgyM2ZePE2WeO2kGbdQNfMv0Lvtz5U56bDELfk23fSQ7hGIxn0VYopk', 'userName': 'Robin Midgett', 'userImage': 'https://play-lh.googleusercontent.com/-uEA8_VM9w9E/AAAAAAAAAAI/AAAAAAAAAAA/AMZuuclHsEJBE7TQHzsVXknrntQT6gPeng/photo.jpg', 'content': 'My email is the most important part of my phone; for the last three days I cannot open my email please fix my email', 'score': 1, 'thumbsUpCount': 0, 'reviewCreatedVersion': '2021.02.21.361635104.Release', 'at': datetime.datetime(2021, 3, 26, 22, 20, 48), 'replyContent': None, 'repliedAt': None}, 'source_name': 'PlayStoreScrapper'}\n",
+            "INFO:Obsei:{'processed_text': \"Not working. Since the update it won't open and keeps closing. Can't open emails or get into the options. I dont use a laptop and need this was great before. Pls fix!\", 'segmented_data': {'negative': 0.7904039621353149, 'positive': 0.4716719686985016, 'performance': 0.23418252170085907, 'interface': 0.10877904295921326, 'crash': 0.10495638847351074}, 'meta': {'reviewId': 'gp:AOqpTOFhPSOW09op95oQpBrNCdOSl26WLsAU1rAt3F9-y8FemnPL_VFZrepnrPiEObRGPEJhcaVV5oVGKvNGlCU', 'userName': 'Trevor Mathers', 'userImage': 'https://play-lh.googleusercontent.com/-JEjpB43nCpo/AAAAAAAAAAI/AAAAAAAAAAA/AMZuuclEQlpZBtcHIliCGJk7MGM91qstfg/photo.jpg', 'content': \"Not working. Since the update it won't open and keeps closing. Can't open emails or get into the options. I dont use a laptop and need this was great before. Pls fix!\", 'score': 1, 'thumbsUpCount': 0, 'reviewCreatedVersion': '2021.02.21.361635104.Release', 'at': datetime.datetime(2021, 3, 26, 22, 18, 51), 'replyContent': None, 'repliedAt': None}, 'source_name': 'PlayStoreScrapper'}\n",
+            "INFO:Obsei:{'processed_text': 'Great', 'segmented_data': {'positive': 0.9977073073387146, 'interface': 0.9308369159698486, 'performance': 0.8807284832000732, 'crash': 0.15913738310337067, 'negative': 0.00010683115397114307}, 'meta': {'reviewId': 'gp:AOqpTOHSuljYDFdw5IrSyIxbkR27Eat7O5eiN-i29WzQikfUf-99WnKRTHJERcaYrwM39BtmActC1F348HV3QX4', 'userName': 'Joylayar Arancana', 'userImage': 'https://play-lh.googleusercontent.com/a-/AOh14GhZMuo0osLrge4atF51DM3rqL0WyvufBFtUGPuB', 'content': 'Great', 'score': 5, 'thumbsUpCount': 0, 'reviewCreatedVersion': '2021.02.21.361635104.Release', 'at': datetime.datetime(2021, 3, 26, 22, 17, 24), 'replyContent': None, 'repliedAt': None}, 'source_name': 'PlayStoreScrapper'}\n",
+            "INFO:Obsei:{'processed_text': 'if is good', 'segmented_data': {'positive': 0.7685442566871643, 'performance': 0.49283134937286377, 'interface': 0.24619683623313904, 'crash': 0.035512909293174744, 'negative': 0.007003331556916237}, 'meta': {'reviewId': 'gp:AOqpTOFk9FtcQZpvLtOnR1nfbMXU-HZ8FPFMJASjU7Yl4roLEUOdQNLgnFQKK7upsonm4ZqJCblNUqxgX16wnFo', 'userName': 'Birhanu Asfaw', 'userImage': 'https://play-lh.googleusercontent.com/a-/AOh14Gil42bhO5cBS74mPevBSs6bAn6C8pGX-XHco-H67A', 'content': 'if is good', 'score': 4, 'thumbsUpCount': 0, 'reviewCreatedVersion': '2020.05.31.316831277.release', 'at': datetime.datetime(2021, 3, 26, 22, 17, 15), 'replyContent': None, 'repliedAt': None}, 'source_name': 'PlayStoreScrapper'}\n",
+            "INFO:Obsei:{'processed_text': \"For the past week, my email will close as soon as I get to open it.. won't evenet me read an email....\", 'segmented_data': {'negative': 0.8093071579933167, 'crash': 0.6003041863441467, 'positive': 0.22885845601558685, 'performance': 0.16369792819023132, 'interface': 0.06454633921384811}, 'meta': {'reviewId': 'gp:AOqpTOGfyAcn_81BVRToRhlolND3A2ufYmfMwyT77UG2y4k3iSiJk3mSRMA3YC75_49J4sJf4nPx8lJekf5ESk8', 'userName': 'Amanda Corey', 'userImage': 'https://play-lh.googleusercontent.com/-y3ydjAWoTWY/AAAAAAAAAAI/AAAAAAAAAAA/AMZuucmMEmAi0EDdIyBrE2cSWWTnYS81bg/photo.jpg', 'content': \"For the past week, my email will close as soon as I get to open it.. won't evenet me read an email....\", 'score': 2, 'thumbsUpCount': 0, 'reviewCreatedVersion': '2021.02.21.361635104.Release', 'at': datetime.datetime(2021, 3, 26, 22, 13, 28), 'replyContent': None, 'repliedAt': None}, 'source_name': 'PlayStoreScrapper'}\n",
+            "INFO:Obsei:{'processed_text': 'Nice', 'segmented_data': {'positive': 0.9959115982055664, 'interface': 0.8088926076889038, 'performance': 0.7435483336448669, 'crash': 0.19384728372097015, 'negative': 0.0002808143035508692}, 'meta': {'reviewId': 'gp:AOqpTOFlo19SGbbFA9MOij1H01uwoq_biUQz6V-XcRTNwekPcvhc52Sm8pbmzuLC4qQqMVpSVz7XQFDFEgP825o', 'userName': 'Arun Kujur', 'userImage': 'https://play-lh.googleusercontent.com/-qds9srrsDjM/AAAAAAAAAAI/AAAAAAAAAAA/AMZuucmrmBJyJhCXiiGP8B3Ov1T5rTai8g/photo.jpg', 'content': 'Nice', 'score': 4, 'thumbsUpCount': 0, 'reviewCreatedVersion': '2021.02.05.357775197.Release', 'at': datetime.datetime(2021, 3, 26, 22, 11, 26), 'replyContent': None, 'repliedAt': None}, 'source_name': 'PlayStoreScrapper'}\n",
+            "INFO:Obsei:{'processed_text': 'Facing notification issues...not getting any notification for all gmail linked accounts. Missing out on important mails on time.', 'segmented_data': {'negative': 0.975699245929718, 'crash': 0.08844484388828278, 'interface': 0.009598585776984692, 'performance': 0.0027882547583431005, 'positive': 3.1121893698582426e-05}, 'meta': {'reviewId': 'gp:AOqpTOHwKwiO8m1QlNqJxZ_GntG-RUdQy1jdH5v-0FTIzgbjxtxoidlQn53AH8F5UfLR2Ysp-kIZOnW1QQ-39U0', 'userName': 'kshitij Maheshwary', 'userImage': 'https://play-lh.googleusercontent.com/a-/AOh14GhKf5XKB1zsgXTXCw8zPudTli4qMHheoYHCEi2UXeI', 'content': 'Facing notification issues...not getting any notification for all gmail linked accounts. Missing out on important mails on time.', 'score': 1, 'thumbsUpCount': 0, 'reviewCreatedVersion': '2021.02.05.357775197.Release', 'at': datetime.datetime(2021, 3, 26, 22, 10, 58), 'replyContent': None, 'repliedAt': None}, 'source_name': 'PlayStoreScrapper'}\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    }
+  ]
+}
diff --git a/obsei_module/tutorials/02_PlayStore_PreProc_Classification_Pandas.ipynb b/obsei_module/tutorials/02_PlayStore_PreProc_Classification_Pandas.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..8c218ac74ab2a054c9df312e1e8c89353a4127a6
--- /dev/null
+++ b/obsei_module/tutorials/02_PlayStore_PreProc_Classification_Pandas.ipynb
@@ -0,0 +1,993 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "name": "02_PlayStore_PreProc_Classification_Pandas.ipynb",
+      "provenance": [],
+      "collapsed_sections": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Q5TZE9JpVaCi"
+      },
+      "source": [
+        "# Obsei Tutorial 02\n",
+        "## This example shows following Obsei workflow\n",
+        " 1. Observe: Play Store's app reviews\n",
+        " 2. Pre-process: Clean review text with properly\n",
+        " 3. Analyze: Classify review text within given category list\n",
+        " 4. Inform: Provide all data in Pandas DataFrame\n",
+        " 5. Store: Store data in Google Drive in CSV format"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "yoBarQI4UFAY"
+      },
+      "source": [
+        "## Install Obsei from latest code, perform these steps -\n",
+        "- Select GPU RunType for faster computation \n",
+        "- Restart Runtime after installation"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "Oh74E2T9HO-F",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "9465415f-1629-4cf4-9cda-513b5b0becf8"
+      },
+      "source": [
+        "!pip install obsei[all]"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "Collecting git+https://github.com/lalitpagaria/obsei.git\n",
+            "  Cloning https://github.com/lalitpagaria/obsei.git to /tmp/pip-req-build-9q4fz4j2\n",
+            "  Running command git clone -q https://github.com/lalitpagaria/obsei.git /tmp/pip-req-build-9q4fz4j2\n",
+            "Requirement already satisfied (use --upgrade to upgrade): obsei==0.0.9 from git+https://github.com/lalitpagaria/obsei.git in /usr/local/lib/python3.7/dist-packages\n",
+            "Requirement already satisfied: app-store-reviews-reader==1.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.2)\n",
+            "Requirement already satisfied: atlassian-python-api==3.10.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.10.0)\n",
+            "Requirement already satisfied: beautifulsoup4==4.9.3 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (4.9.3)\n",
+            "Requirement already satisfied: blis==0.7.4 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.7.4)\n",
+            "Requirement already satisfied: cachetools==4.2.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (4.2.2)\n",
+            "Requirement already satisfied: catalogue==2.0.4 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.0.4)\n",
+            "Requirement already satisfied: certifi==2021.5.30 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2021.5.30)\n",
+            "Requirement already satisfied: chardet==4.0.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (4.0.0)\n",
+            "Requirement already satisfied: click==7.1.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (7.1.2)\n",
+            "Requirement already satisfied: courlan==0.4.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.4.0)\n",
+            "Requirement already satisfied: cssselect==1.1.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.1.0)\n",
+            "Requirement already satisfied: cymem==2.0.5 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.0.5)\n",
+            "Requirement already satisfied: dateparser==1.0.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.0.0)\n",
+            "Requirement already satisfied: deprecated==1.2.12 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.2.12)\n",
+            "Requirement already satisfied: elasticsearch==7.13.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (7.13.1)\n",
+            "Requirement already satisfied: feedparser==6.0.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (6.0.2)\n",
+            "Requirement already satisfied: filelock==3.0.12 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.0.12)\n",
+            "Requirement already satisfied: gnews==0.1.3 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.1.3)\n",
+            "Requirement already satisfied: google-api-core==1.30.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.30.0)\n",
+            "Requirement already satisfied: google-api-python-client==2.8.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.8.0)\n",
+            "Requirement already satisfied: google-auth==1.30.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.30.2)\n",
+            "Requirement already satisfied: google-auth-httplib2==0.1.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.1.0)\n",
+            "Requirement already satisfied: google-play-scraper==1.0.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.0.0)\n",
+            "Requirement already satisfied: googleapis-common-protos==1.53.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.53.0)\n",
+            "Requirement already satisfied: greenlet==1.1.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.1.0)\n",
+            "Requirement already satisfied: htmldate==0.8.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.8.1)\n",
+            "Requirement already satisfied: httplib2==0.19.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.19.1)\n",
+            "Requirement already satisfied: huggingface-hub==0.0.8 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.0.8)\n",
+            "Requirement already satisfied: idna==2.10 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.10)\n",
+            "Requirement already satisfied: importlib-metadata==4.5.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (4.5.0)\n",
+            "Requirement already satisfied: jinja2==3.0.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.0.1)\n",
+            "Requirement already satisfied: joblib==1.0.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.0.1)\n",
+            "Requirement already satisfied: justext==2.2.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.2.0)\n",
+            "Requirement already satisfied: lxml==4.6.3 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (4.6.3)\n",
+            "Requirement already satisfied: markupsafe==2.0.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.0.1)\n",
+            "Requirement already satisfied: mmh3==3.0.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.0.0)\n",
+            "Requirement already satisfied: murmurhash==1.0.5 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.0.5)\n",
+            "Requirement already satisfied: nltk==3.6.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.6.2)\n",
+            "Requirement already satisfied: numpy==1.20.3 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.20.3)\n",
+            "Requirement already satisfied: oauthlib==3.1.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.1.1)\n",
+            "Requirement already satisfied: packaging==20.9 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (20.9)\n",
+            "Requirement already satisfied: pandas==1.2.4 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.2.4)\n",
+            "Requirement already satisfied: pathy==0.5.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.5.2)\n",
+            "Requirement already satisfied: praw==7.2.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (7.2.0)\n",
+            "Requirement already satisfied: prawcore==2.1.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.1.0)\n",
+            "Requirement already satisfied: preshed==3.0.5 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.0.5)\n",
+            "Requirement already satisfied: presidio-analyzer==2.2.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.2.1)\n",
+            "Requirement already satisfied: presidio-anonymizer==2.2.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.2.1)\n",
+            "Requirement already satisfied: protobuf==3.17.3 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.17.3)\n",
+            "Requirement already satisfied: pyasn1==0.4.8 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.4.8)\n",
+            "Requirement already satisfied: pyasn1-modules==0.2.8 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.2.8)\n",
+            "Requirement already satisfied: pycryptodome==3.10.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.10.1)\n",
+            "Requirement already satisfied: pydantic==1.7.4 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.7.4)\n",
+            "Requirement already satisfied: pyparsing==2.4.7 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.4.7)\n",
+            "Requirement already satisfied: python-dateutil==2.8.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.8.1)\n",
+            "Requirement already satisfied: python-facebook-api==0.9.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.9.2)\n",
+            "Requirement already satisfied: pytz==2021.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2021.1)\n",
+            "Requirement already satisfied: pyyaml==5.4.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (5.4.1)\n",
+            "Requirement already satisfied: readability-lxml==0.8.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.8.1)\n",
+            "Requirement already satisfied: reddit-rss-reader==1.3.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.3.2)\n",
+            "Requirement already satisfied: regex==2020.11.13 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2020.11.13)\n",
+            "Requirement already satisfied: requests==2.25.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.25.1)\n",
+            "Requirement already satisfied: requests-file==1.5.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.5.1)\n",
+            "Requirement already satisfied: requests-oauthlib==1.3.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.3.0)\n",
+            "Requirement already satisfied: rsa==4.7.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (4.7.2)\n",
+            "Requirement already satisfied: sacremoses==0.0.45 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.0.45)\n",
+            "Requirement already satisfied: searchtweets-v2==1.0.7 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.0.7)\n",
+            "Requirement already satisfied: sentencepiece==0.1.95 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.1.95)\n",
+            "Requirement already satisfied: sgmllib3k==1.0.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.0.0)\n",
+            "Requirement already satisfied: six==1.16.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.16.0)\n",
+            "Requirement already satisfied: slack-sdk==3.6.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.6.0)\n",
+            "Requirement already satisfied: smart-open==3.0.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.0.0)\n",
+            "Requirement already satisfied: soupsieve==2.2.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.2.1)\n",
+            "Requirement already satisfied: spacy==3.0.5 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.0.5)\n",
+            "Requirement already satisfied: spacy-legacy==3.0.5 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.0.5)\n",
+            "Requirement already satisfied: sqlalchemy==1.4.17 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.4.17)\n",
+            "Requirement already satisfied: srsly==2.4.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.4.1)\n",
+            "Requirement already satisfied: thinc==8.0.4 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (8.0.4)\n",
+            "Requirement already satisfied: tld==0.12.6 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.12.6)\n",
+            "Requirement already satisfied: tldextract==3.1.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.1.0)\n",
+            "Requirement already satisfied: tokenizers==0.10.3 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.10.3)\n",
+            "Requirement already satisfied: tqdm==4.61.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (4.61.0)\n",
+            "Requirement already satisfied: trafilatura==0.8.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.8.2)\n",
+            "Requirement already satisfied: transformers==4.6.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (4.6.1)\n",
+            "Requirement already satisfied: tweet-preprocessor==0.6.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.6.0)\n",
+            "Requirement already satisfied: typer==0.3.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.3.2)\n",
+            "Requirement already satisfied: typing-extensions==3.10.0.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.10.0.0)\n",
+            "Requirement already satisfied: tzlocal==2.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.1)\n",
+            "Requirement already satisfied: update-checker==0.18.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.18.0)\n",
+            "Requirement already satisfied: uritemplate==3.0.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.0.1)\n",
+            "Requirement already satisfied: urllib3==1.26.5 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.26.5)\n",
+            "Requirement already satisfied: vadersentiment==3.3.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.3.2)\n",
+            "Requirement already satisfied: wasabi==0.8.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.8.2)\n",
+            "Requirement already satisfied: websocket-client==1.0.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.0.1)\n",
+            "Requirement already satisfied: wrapt==1.12.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.12.1)\n",
+            "Requirement already satisfied: zenpy==2.0.24 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.0.24)\n",
+            "Requirement already satisfied: zipp==3.4.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.4.1)\n",
+            "Requirement already satisfied: torch==1.8.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.8.1)\n",
+            "Requirement already satisfied: setuptools>=40.3.0 in /usr/local/lib/python3.7/dist-packages (from google-api-core==1.30.0->obsei==0.0.9) (57.0.0)\n",
+            "Requirement already satisfied: responses>=0.11 in /usr/local/lib/python3.7/dist-packages (from python-facebook-api==0.9.2->obsei==0.0.9) (0.13.3)\n",
+            "Requirement already satisfied: cattrs<2.0,>=1.1; python_version >= \"3.7\" and python_version < \"4.0\" in /usr/local/lib/python3.7/dist-packages (from python-facebook-api==0.9.2->obsei==0.0.9) (1.7.1)\n",
+            "Requirement already satisfied: attrs<21.0.0,>=20.1.0 in /usr/local/lib/python3.7/dist-packages (from python-facebook-api==0.9.2->obsei==0.0.9) (20.3.0)\n",
+            "Building wheels for collected packages: obsei\n",
+            "  Building wheel for obsei (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+            "  Created wheel for obsei: filename=obsei-0.0.9-cp37-none-any.whl size=65557 sha256=bc7c8c937eed4a7b325b3ef8e46de64e44778e40914d99267356cc4ce36c7c27\n",
+            "  Stored in directory: /tmp/pip-ephem-wheel-cache-qhkx9sy8/wheels/49/1a/6e/2fd83c9a275b7096fc615a0edef2d55b1fc33c3751ba45c1ad\n",
+            "Successfully built obsei\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "w-avdxnAUkrR"
+      },
+      "source": [
+        "## Mount your Google Drive to store CSV"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "nn216pBrEzIz",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "288fed19-0714-4c36-b012-4ec365b63c8c"
+      },
+      "source": [
+        "from google.colab import drive\n",
+        "drive.mount('/content/drive')"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "DjV4pKGEU0IG"
+      },
+      "source": [
+        "## Configure following input -\n",
+        "- `name`: Brand name of App\n",
+        "- `category_list`: List of categories to perform review text classification\n",
+        "- `identifier`: Package name of the app, it can be found at the end of the url of app in play store\n",
+        "- `country`: Country of reviews\n",
+        "- `lookup_period`: How many old reviews to collect (**Note**: Google rate limit and provide max 200 reviews only)\n",
+        "- `extra_stop_words`: Extra stop words top clean from review text\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "-ex2Uj01oyte"
+      },
+      "source": [
+        "name = \"zomato\"\n",
+        "category_list = [\"easyOrder placement\", \"Realtime order tracking\", \"easy payment options\",\"Rewards and discounts\",\"user interface\",\"social media Integration\",]\n",
+        "identifier = \"com.application.zomato\"\n",
+        "country = \"in\"\n",
+        "lookup_period = \"365d\"\n",
+        "extra_stop_words = [\"i\", \"-\", \"day\", \"will\", \".\", \"use\", \"n\", \"without\", \"please\", \"app\", \"ha\", \"ho\", \"nt\", \"wa\", \n",
+        "                    \"thi\", \"plz\", \"pleas\", \"ff\", \"ya\", \"thank\", \"you\", \"thanks\", \"mai\"]"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "cBslruvycDfR"
+      },
+      "source": [
+        "## Configure columns of Pandas DataFrame\n",
+        "`included_cols` will only be returned by Pandas Sink and `rename_cols_dict` will rename selected `included_cols` columns to desired one"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "r5FUKda1cDCI"
+      },
+      "source": [
+        "included_cols = [f\"segmented_data_classifier_data_{category}\" for category in category_list]\n",
+        "included_cols.append(\"segmented_data_classifier_data_positive\")\n",
+        "included_cols.append(\"segmented_data_classifier_data_negative\")\n",
+        "included_cols.append(\"processed_text\")\n",
+        "included_cols.append(\"meta_at\")\n",
+        "included_cols.append(\"meta_date\")\n",
+        "included_cols.append(\"meta_published date\")\n",
+        "included_cols.append(\"meta_score\")\n",
+        "# included_cols.append(\"meta_title\")\n",
+        "included_cols.append(\"meta_publisher_title\")\n",
+        "\n",
+        "rename_cols_dict = {f\"segmented_data_classifier_data_{category}\": category for category in category_list}\n",
+        "rename_cols_dict[\"segmented_data_classifier_data_positive\"] = \"positive\"\n",
+        "rename_cols_dict[\"segmented_data_classifier_data_negative\"] = \"negative\"\n",
+        "rename_cols_dict[\"processed_text\"] = \"text\"\n",
+        "rename_cols_dict[\"meta_at\"] = \"time\"\n",
+        "rename_cols_dict[\"meta_date\"] = \"time\"\n",
+        "rename_cols_dict[\"meta_published date\"] = \"time\"\n",
+        "rename_cols_dict[\"meta_score\"] = \"ratings\"\n",
+        "# rename_cols_dict[\"meta_title\"] = \"title\"\n",
+        "rename_cols_dict[\"meta_publisher_title\"] = \"news publisher\"\n",
+        "rename_cols_dict['Unnamed: 0'] = 'reviews'"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "aBCbHvGnU0qw"
+      },
+      "source": [
+        "## Configure Play Store Review Observer\n",
+        "\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "Yr8ucKlJHRUg"
+      },
+      "source": [
+        "from obsei.source.playstore_scrapper import (\n",
+        "    PlayStoreScrapperSource,\n",
+        "    PlayStoreScrapperConfig,\n",
+        ")\n",
+        "\n",
+        "source_config = PlayStoreScrapperConfig(\n",
+        "    countries=[country],\n",
+        "    package_name=identifier,\n",
+        "    lookup_period=lookup_period\n",
+        ")\n",
+        "\n",
+        "source = PlayStoreScrapperSource()"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "joXahJ_6U2XQ"
+      },
+      "source": [
+        "## Configure TextCleaner as Pre-Processor to clean review text\n",
+        "These cleaning function will run serially"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "esmT-8IhpHvp",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "5ddd4c5a-6828-4145-8b1e-a30639a16223"
+      },
+      "source": [
+        "from obsei.preprocessor.text_cleaner import TextCleaner, TextCleanerConfig\n",
+        "from obsei.preprocessor.text_cleaning_function import *\n",
+        "\n",
+        "text_cleaner_config = TextCleanerConfig(\n",
+        "    stop_words=extra_stop_words,\n",
+        "    cleaning_functions = [\n",
+        "        ToLowerCase(),\n",
+        "        RemoveWhiteSpaceAndEmptyToken(),\n",
+        "        RemovePunctuation(),\n",
+        "        RemoveSpecialChars(),\n",
+        "        DecodeUnicode(),\n",
+        "        RemoveDateTime(),\n",
+        "        RemoveStopWords(),\n",
+        "        RemoveStopWords(stop_words=extra_stop_words),\n",
+        "        RemoveWhiteSpaceAndEmptyToken(),\n",
+        "   ]\n",
+        ")\n",
+        "\n",
+        "text_cleaner = TextCleaner()"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "[nltk_data] Downloading package stopwords to /root/nltk_data...\n",
+            "[nltk_data]   Package stopwords is already up-to-date!\n"
+          ],
+          "name": "stderr"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "yiLCi925U1YN"
+      },
+      "source": [
+        "## Configure Classification Analyzer\n",
+        "**Note**: Select model from https://huggingface.co/models?pipeline_tag=zero-shot-classification, if you want to try different one"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "26ASFOm_HW2s"
+      },
+      "source": [
+        "from obsei.analyzer.classification_analyzer import ClassificationAnalyzerConfig, ZeroShotClassificationAnalyzer\n",
+        "\n",
+        "analyzer_config=ClassificationAnalyzerConfig(\n",
+        "   labels=category_list,\n",
+        ")\n",
+        "\n",
+        "text_analyzer = ZeroShotClassificationAnalyzer(\n",
+        "   model_name_or_path=\"typeform/mobilebert-uncased-mnli\",\n",
+        "   device=\"auto\"\n",
+        ")"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "51iUR7QOU1zC"
+      },
+      "source": [
+        "## Configure Pandas DataFrame Informer\n",
+        "\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "1aLYa-QBHaDs"
+      },
+      "source": [
+        "from pandas import DataFrame\n",
+        "from obsei.sink.pandas_sink import PandasSink, PandasSinkConfig\n",
+        "\n",
+        "sink_config = PandasSinkConfig(\n",
+        "   dataframe=DataFrame(),\n",
+        "   include_columns_list=included_cols\n",
+        ")\n",
+        "sink = PandasSink()"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "xvy-TwzQU3rb"
+      },
+      "source": [
+        "## Fetch app reviews"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "YmTz7qKayCTW"
+      },
+      "source": [
+        "source_response_list = source.lookup(source_config)"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "JDCsk9QoU4pu"
+      },
+      "source": [
+        "## PreProcess review text to clean it\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "TnbjTr4vyHOk",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "69115baf-4b22-462f-c8fd-faaeb0601ba7"
+      },
+      "source": [
+        "cleaner_response_list = text_cleaner.preprocess_input(\n",
+        "    input_list=source_response_list,\n",
+        "    config=text_cleaner_config\n",
+        ")"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "07/11/2021 17:09:25 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:25 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:25 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:25 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:25 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n",
+            "07/11/2021 17:09:26 - WARNING - obsei.preprocessor.text_cleaning_function -   Token contain invalid date time format\n"
+          ],
+          "name": "stderr"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "tjoWwW42U4J6"
+      },
+      "source": [
+        "## Analyze reviews to perform classification\n",
+        "**Note**: This is compute heavy step"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "qZyRJIcjyEIu",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "678f8211-9bcc-4fd7-ab95-0c73fda47938"
+      },
+      "source": [
+        "analyzer_response_list = text_analyzer.analyze_input(\n",
+        "    source_response_list=cleaner_response_list,\n",
+        "    analyzer_config=analyzer_config\n",
+        ")"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n"
+          ],
+          "name": "stderr"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "9bihm518VDSJ"
+      },
+      "source": [
+        "## Inform review data in form of Pandas DataFrame"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "zFPdISnWHr9j",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 419
+        },
+        "outputId": "14316ee7-22d4-4542-c0b8-e6dbe1dff1f2"
+      },
+      "source": [
+        "dataframe = sink.send_data(analyzer_response_list, sink_config)\n",
+        "dataframe.rename(rename_cols_dict,axis=1,inplace=True)\n",
+        "\n",
+        "\n",
+        "dataframe[\"brand\"] = name\n",
+        "dataframe"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>text</th>\n",
+              "      <th>positive</th>\n",
+              "      <th>easy payment options</th>\n",
+              "      <th>easyOrder placement</th>\n",
+              "      <th>user interface</th>\n",
+              "      <th>Realtime order tracking</th>\n",
+              "      <th>Rewards and discounts</th>\n",
+              "      <th>social media Integration</th>\n",
+              "      <th>negative</th>\n",
+              "      <th>ratings</th>\n",
+              "      <th>time</th>\n",
+              "      <th>brand</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>good</td>\n",
+              "      <td>1.00</td>\n",
+              "      <td>0.67</td>\n",
+              "      <td>0.65</td>\n",
+              "      <td>0.60</td>\n",
+              "      <td>0.43</td>\n",
+              "      <td>0.35</td>\n",
+              "      <td>0.06</td>\n",
+              "      <td>0.00</td>\n",
+              "      <td>5</td>\n",
+              "      <td>2021-07-11 17:09:17</td>\n",
+              "      <td>zomato</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>excellent loving</td>\n",
+              "      <td>1.00</td>\n",
+              "      <td>0.20</td>\n",
+              "      <td>0.19</td>\n",
+              "      <td>0.32</td>\n",
+              "      <td>0.10</td>\n",
+              "      <td>0.11</td>\n",
+              "      <td>0.01</td>\n",
+              "      <td>0.00</td>\n",
+              "      <td>5</td>\n",
+              "      <td>2021-07-11 17:08:09</td>\n",
+              "      <td>zomato</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>delievered wrong house</td>\n",
+              "      <td>0.00</td>\n",
+              "      <td>0.00</td>\n",
+              "      <td>0.00</td>\n",
+              "      <td>0.26</td>\n",
+              "      <td>0.00</td>\n",
+              "      <td>0.02</td>\n",
+              "      <td>0.03</td>\n",
+              "      <td>0.99</td>\n",
+              "      <td>1</td>\n",
+              "      <td>2021-07-11 17:07:36</td>\n",
+              "      <td>zomato</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>superb excellent</td>\n",
+              "      <td>1.00</td>\n",
+              "      <td>0.55</td>\n",
+              "      <td>0.57</td>\n",
+              "      <td>0.71</td>\n",
+              "      <td>0.28</td>\n",
+              "      <td>0.20</td>\n",
+              "      <td>0.02</td>\n",
+              "      <td>0.00</td>\n",
+              "      <td>5</td>\n",
+              "      <td>2021-07-11 17:07:17</td>\n",
+              "      <td>zomato</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>good</td>\n",
+              "      <td>1.00</td>\n",
+              "      <td>0.67</td>\n",
+              "      <td>0.65</td>\n",
+              "      <td>0.60</td>\n",
+              "      <td>0.43</td>\n",
+              "      <td>0.35</td>\n",
+              "      <td>0.06</td>\n",
+              "      <td>0.00</td>\n",
+              "      <td>4</td>\n",
+              "      <td>2021-07-11 17:05:58</td>\n",
+              "      <td>zomato</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>...</th>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>195</th>\n",
+              "      <td>sellers cheat users selling less quantity cont...</td>\n",
+              "      <td>0.18</td>\n",
+              "      <td>0.00</td>\n",
+              "      <td>0.04</td>\n",
+              "      <td>0.07</td>\n",
+              "      <td>0.04</td>\n",
+              "      <td>0.08</td>\n",
+              "      <td>0.03</td>\n",
+              "      <td>0.68</td>\n",
+              "      <td>1</td>\n",
+              "      <td>2021-07-11 16:08:05</td>\n",
+              "      <td>zomato</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>196</th>\n",
+              "      <td>nice service</td>\n",
+              "      <td>0.99</td>\n",
+              "      <td>0.81</td>\n",
+              "      <td>0.40</td>\n",
+              "      <td>0.60</td>\n",
+              "      <td>0.12</td>\n",
+              "      <td>0.28</td>\n",
+              "      <td>0.02</td>\n",
+              "      <td>0.00</td>\n",
+              "      <td>5</td>\n",
+              "      <td>2021-07-11 16:07:52</td>\n",
+              "      <td>zomato</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>197</th>\n",
+              "      <td>amazing experience far</td>\n",
+              "      <td>0.99</td>\n",
+              "      <td>0.02</td>\n",
+              "      <td>0.04</td>\n",
+              "      <td>0.21</td>\n",
+              "      <td>0.09</td>\n",
+              "      <td>0.02</td>\n",
+              "      <td>0.01</td>\n",
+              "      <td>0.00</td>\n",
+              "      <td>5</td>\n",
+              "      <td>2021-07-11 16:07:53</td>\n",
+              "      <td>zomato</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>198</th>\n",
+              "      <td>delivery fast less offers cash delivery</td>\n",
+              "      <td>0.94</td>\n",
+              "      <td>0.94</td>\n",
+              "      <td>0.17</td>\n",
+              "      <td>0.62</td>\n",
+              "      <td>0.13</td>\n",
+              "      <td>0.06</td>\n",
+              "      <td>0.03</td>\n",
+              "      <td>0.42</td>\n",
+              "      <td>2</td>\n",
+              "      <td>2021-07-11 16:07:38</td>\n",
+              "      <td>zomato</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>199</th>\n",
+              "      <td>good food fast delivery</td>\n",
+              "      <td>0.99</td>\n",
+              "      <td>0.95</td>\n",
+              "      <td>0.30</td>\n",
+              "      <td>0.52</td>\n",
+              "      <td>0.09</td>\n",
+              "      <td>0.33</td>\n",
+              "      <td>0.00</td>\n",
+              "      <td>0.00</td>\n",
+              "      <td>5</td>\n",
+              "      <td>2021-07-11 16:07:23</td>\n",
+              "      <td>zomato</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>200 rows × 12 columns</p>\n",
+              "</div>"
+            ],
+            "text/plain": [
+              "                                                  text  ...   brand\n",
+              "0                                                 good  ...  zomato\n",
+              "1                                     excellent loving  ...  zomato\n",
+              "2                               delievered wrong house  ...  zomato\n",
+              "3                                     superb excellent  ...  zomato\n",
+              "4                                                 good  ...  zomato\n",
+              "..                                                 ...  ...     ...\n",
+              "195  sellers cheat users selling less quantity cont...  ...  zomato\n",
+              "196                                       nice service  ...  zomato\n",
+              "197                             amazing experience far  ...  zomato\n",
+              "198            delivery fast less offers cash delivery  ...  zomato\n",
+              "199                            good food fast delivery  ...  zomato\n",
+              "\n",
+              "[200 rows x 12 columns]"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          },
+          "execution_count": 26
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "maUCheshVD0D"
+      },
+      "source": [
+        "## Store result in Google Drive as CSV"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "_R-UBzg4ma3-"
+      },
+      "source": [
+        "dataframe.to_csv(f'/content/drive/My Drive/playstore_{name}.csv')"
+      ],
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}
diff --git a/obsei_module/tutorials/03_AppStore_PreProc_Classification_Pandas.ipynb b/obsei_module/tutorials/03_AppStore_PreProc_Classification_Pandas.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..fd7733dd9fa0e4d45ce326d09474d70e862e7055
--- /dev/null
+++ b/obsei_module/tutorials/03_AppStore_PreProc_Classification_Pandas.ipynb
@@ -0,0 +1,791 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "name": "03_AppStore_PreProc_Classification_Pandas.ipynb",
+      "provenance": [],
+      "collapsed_sections": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "9jR2mNIedthU"
+      },
+      "source": [
+        "# Obsei Tutorial 03\n",
+        "## This example shows following Obsei workflow\n",
+        " 1. Observe: App Store's app reviews\n",
+        " 2. Pre-process: Clean review text with properly\n",
+        " 3. Analyze: Classify review text within given category list\n",
+        " 4. Inform: Provide all data in Pandas DataFrame\n",
+        " 5. Store: Store data in Google Drive in CSV format"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "mBx_RzpFVFQL"
+      },
+      "source": [
+        "## Install Obsei from latest code, perform these steps -\n",
+        "- Select GPU RunType for faster computation \n",
+        "- Restart Runtime after installation\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "Oh74E2T9HO-F",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "a95aff05-684e-44e4-e21a-fbbfe8010453"
+      },
+      "source": [
+        "!pip install obsei[all]"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "Collecting git+https://github.com/lalitpagaria/obsei.git\n",
+            "  Cloning https://github.com/lalitpagaria/obsei.git to /tmp/pip-req-build-wl_1hpon\n",
+            "  Running command git clone -q https://github.com/lalitpagaria/obsei.git /tmp/pip-req-build-wl_1hpon\n",
+            "Requirement already satisfied (use --upgrade to upgrade): obsei==0.0.9 from git+https://github.com/lalitpagaria/obsei.git in /usr/local/lib/python3.7/dist-packages\n",
+            "Requirement already satisfied: app-store-reviews-reader==1.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.2)\n",
+            "Requirement already satisfied: atlassian-python-api==3.10.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.10.0)\n",
+            "Requirement already satisfied: beautifulsoup4==4.9.3 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (4.9.3)\n",
+            "Requirement already satisfied: blis==0.7.4 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.7.4)\n",
+            "Requirement already satisfied: cachetools==4.2.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (4.2.2)\n",
+            "Requirement already satisfied: catalogue==2.0.4 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.0.4)\n",
+            "Requirement already satisfied: certifi==2021.5.30 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2021.5.30)\n",
+            "Requirement already satisfied: chardet==4.0.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (4.0.0)\n",
+            "Requirement already satisfied: click==7.1.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (7.1.2)\n",
+            "Requirement already satisfied: courlan==0.4.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.4.0)\n",
+            "Requirement already satisfied: cssselect==1.1.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.1.0)\n",
+            "Requirement already satisfied: cymem==2.0.5 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.0.5)\n",
+            "Requirement already satisfied: dateparser==1.0.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.0.0)\n",
+            "Requirement already satisfied: deprecated==1.2.12 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.2.12)\n",
+            "Requirement already satisfied: elasticsearch==7.13.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (7.13.1)\n",
+            "Requirement already satisfied: feedparser==6.0.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (6.0.2)\n",
+            "Requirement already satisfied: filelock==3.0.12 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.0.12)\n",
+            "Requirement already satisfied: gnews==0.1.3 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.1.3)\n",
+            "Requirement already satisfied: google-api-core==1.30.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.30.0)\n",
+            "Requirement already satisfied: google-api-python-client==2.8.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.8.0)\n",
+            "Requirement already satisfied: google-auth==1.30.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.30.2)\n",
+            "Requirement already satisfied: google-auth-httplib2==0.1.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.1.0)\n",
+            "Requirement already satisfied: google-play-scraper==1.0.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.0.0)\n",
+            "Requirement already satisfied: googleapis-common-protos==1.53.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.53.0)\n",
+            "Requirement already satisfied: greenlet==1.1.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.1.0)\n",
+            "Requirement already satisfied: htmldate==0.8.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.8.1)\n",
+            "Requirement already satisfied: httplib2==0.19.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.19.1)\n",
+            "Requirement already satisfied: huggingface-hub==0.0.8 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.0.8)\n",
+            "Requirement already satisfied: idna==2.10 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.10)\n",
+            "Requirement already satisfied: importlib-metadata==4.5.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (4.5.0)\n",
+            "Requirement already satisfied: jinja2==3.0.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.0.1)\n",
+            "Requirement already satisfied: joblib==1.0.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.0.1)\n",
+            "Requirement already satisfied: justext==2.2.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.2.0)\n",
+            "Requirement already satisfied: lxml==4.6.3 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (4.6.3)\n",
+            "Requirement already satisfied: markupsafe==2.0.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.0.1)\n",
+            "Requirement already satisfied: mmh3==3.0.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.0.0)\n",
+            "Requirement already satisfied: murmurhash==1.0.5 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.0.5)\n",
+            "Requirement already satisfied: nltk==3.6.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.6.2)\n",
+            "Requirement already satisfied: numpy==1.20.3 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.20.3)\n",
+            "Requirement already satisfied: oauthlib==3.1.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.1.1)\n",
+            "Requirement already satisfied: packaging==20.9 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (20.9)\n",
+            "Requirement already satisfied: pandas==1.2.4 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.2.4)\n",
+            "Requirement already satisfied: pathy==0.5.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.5.2)\n",
+            "Requirement already satisfied: praw==7.2.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (7.2.0)\n",
+            "Requirement already satisfied: prawcore==2.1.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.1.0)\n",
+            "Requirement already satisfied: preshed==3.0.5 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.0.5)\n",
+            "Requirement already satisfied: presidio-analyzer==2.2.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.2.1)\n",
+            "Requirement already satisfied: presidio-anonymizer==2.2.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.2.1)\n",
+            "Requirement already satisfied: protobuf==3.17.3 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.17.3)\n",
+            "Requirement already satisfied: pyasn1==0.4.8 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.4.8)\n",
+            "Requirement already satisfied: pyasn1-modules==0.2.8 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.2.8)\n",
+            "Requirement already satisfied: pycryptodome==3.10.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.10.1)\n",
+            "Requirement already satisfied: pydantic==1.7.4 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.7.4)\n",
+            "Requirement already satisfied: pyparsing==2.4.7 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.4.7)\n",
+            "Requirement already satisfied: python-dateutil==2.8.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.8.1)\n",
+            "Requirement already satisfied: python-facebook-api==0.9.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.9.2)\n",
+            "Requirement already satisfied: pytz==2021.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2021.1)\n",
+            "Requirement already satisfied: pyyaml==5.4.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (5.4.1)\n",
+            "Requirement already satisfied: readability-lxml==0.8.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.8.1)\n",
+            "Requirement already satisfied: reddit-rss-reader==1.3.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.3.2)\n",
+            "Requirement already satisfied: regex==2020.11.13 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2020.11.13)\n",
+            "Requirement already satisfied: requests==2.25.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.25.1)\n",
+            "Requirement already satisfied: requests-file==1.5.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.5.1)\n",
+            "Requirement already satisfied: requests-oauthlib==1.3.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.3.0)\n",
+            "Requirement already satisfied: rsa==4.7.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (4.7.2)\n",
+            "Requirement already satisfied: sacremoses==0.0.45 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.0.45)\n",
+            "Requirement already satisfied: searchtweets-v2==1.0.7 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.0.7)\n",
+            "Requirement already satisfied: sentencepiece==0.1.95 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.1.95)\n",
+            "Requirement already satisfied: sgmllib3k==1.0.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.0.0)\n",
+            "Requirement already satisfied: six==1.16.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.16.0)\n",
+            "Requirement already satisfied: slack-sdk==3.6.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.6.0)\n",
+            "Requirement already satisfied: smart-open==3.0.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.0.0)\n",
+            "Requirement already satisfied: soupsieve==2.2.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.2.1)\n",
+            "Requirement already satisfied: spacy==3.0.5 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.0.5)\n",
+            "Requirement already satisfied: spacy-legacy==3.0.5 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.0.5)\n",
+            "Requirement already satisfied: sqlalchemy==1.4.17 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.4.17)\n",
+            "Requirement already satisfied: srsly==2.4.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.4.1)\n",
+            "Requirement already satisfied: thinc==8.0.4 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (8.0.4)\n",
+            "Requirement already satisfied: tld==0.12.6 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.12.6)\n",
+            "Requirement already satisfied: tldextract==3.1.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.1.0)\n",
+            "Requirement already satisfied: tokenizers==0.10.3 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.10.3)\n",
+            "Requirement already satisfied: tqdm==4.61.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (4.61.0)\n",
+            "Requirement already satisfied: trafilatura==0.8.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.8.2)\n",
+            "Requirement already satisfied: transformers==4.6.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (4.6.1)\n",
+            "Requirement already satisfied: tweet-preprocessor==0.6.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.6.0)\n",
+            "Requirement already satisfied: typer==0.3.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.3.2)\n",
+            "Requirement already satisfied: typing-extensions==3.10.0.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.10.0.0)\n",
+            "Requirement already satisfied: tzlocal==2.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.1)\n",
+            "Requirement already satisfied: update-checker==0.18.0 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.18.0)\n",
+            "Requirement already satisfied: uritemplate==3.0.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.0.1)\n",
+            "Requirement already satisfied: urllib3==1.26.5 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.26.5)\n",
+            "Requirement already satisfied: vadersentiment==3.3.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.3.2)\n",
+            "Requirement already satisfied: wasabi==0.8.2 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (0.8.2)\n",
+            "Requirement already satisfied: websocket-client==1.0.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.0.1)\n",
+            "Requirement already satisfied: wrapt==1.12.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.12.1)\n",
+            "Requirement already satisfied: zenpy==2.0.24 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (2.0.24)\n",
+            "Requirement already satisfied: zipp==3.4.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (3.4.1)\n",
+            "Requirement already satisfied: torch==1.8.1 in /usr/local/lib/python3.7/dist-packages (from obsei==0.0.9) (1.8.1)\n",
+            "Requirement already satisfied: setuptools>=40.3.0 in /usr/local/lib/python3.7/dist-packages (from google-api-core==1.30.0->obsei==0.0.9) (57.0.0)\n",
+            "Requirement already satisfied: cattrs<2.0,>=1.1; python_version >= \"3.7\" and python_version < \"4.0\" in /usr/local/lib/python3.7/dist-packages (from python-facebook-api==0.9.2->obsei==0.0.9) (1.7.1)\n",
+            "Requirement already satisfied: responses>=0.11 in /usr/local/lib/python3.7/dist-packages (from python-facebook-api==0.9.2->obsei==0.0.9) (0.13.3)\n",
+            "Requirement already satisfied: attrs<21.0.0,>=20.1.0 in /usr/local/lib/python3.7/dist-packages (from python-facebook-api==0.9.2->obsei==0.0.9) (20.3.0)\n",
+            "Building wheels for collected packages: obsei\n",
+            "  Building wheel for obsei (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+            "  Created wheel for obsei: filename=obsei-0.0.9-cp37-none-any.whl size=65557 sha256=cce33049986ee20144625a85f90699a6ae020c7a8454bb4f156750446385e03b\n",
+            "  Stored in directory: /tmp/pip-ephem-wheel-cache-4be2m6lr/wheels/49/1a/6e/2fd83c9a275b7096fc615a0edef2d55b1fc33c3751ba45c1ad\n",
+            "Successfully built obsei\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "52xZY6EVVFhW"
+      },
+      "source": [
+        "## Mount your Google Drive to store CSV"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "nn216pBrEzIz",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "18e156b7-9f74-4dfb-f5e7-f70c5c8af95c"
+      },
+      "source": [
+        "from google.colab import drive\n",
+        "drive.mount('/content/drive')"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "JtTiwHGFVF5f"
+      },
+      "source": [
+        "## Configure following input -\n",
+        "- `name`: Brand name of App\n",
+        "- `category_list`: List of categories to perform review text classification\n",
+        "- `identifier`: Id of the app, it can be found at the end of the url of app in app store\n",
+        "- `country`: Country of reviews\n",
+        "- `lookup_period`: How many old reviews to collect (**Note**: Apple rate limit and provide max 450 reviews only)\n",
+        "- `extra_stop_words`: Extra stop words top clean from review text"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "-ex2Uj01oyte"
+      },
+      "source": [
+        "name = \"zomato\"\n",
+        "category_list = [\"easy order placement\", \"realtime order tracking\", \"easy payment options\", \"rewards and discounts\",\"user interface\", \"social media Integration\"]\n",
+        "identifier = \"434613896\"\n",
+        "country = \"in\"\n",
+        "lookup_period = \"365d\"\n",
+        "extra_stop_words = [\"i\", \"-\", \"day\", \"will\", \".\", \"use\", \"n\", \"without\", \"please\", \"app\", \"ha\", \"ho\", \"nt\", \"wa\", \n",
+        "                    \"thi\", \"plz\", \"pleas\", \"ff\", \"ya\", \"thank\", \"you\", \"thanks\", \"mai\"]"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "7a4yOP-9eZYJ"
+      },
+      "source": [
+        "## Configure columns of Pandas DataFrame\n",
+        "`included_cols` will only be returned by Pandas Sink and `rename_cols_dict` will rename selected `included_cols` columns to desired one"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "DBE-jy4QeZzu"
+      },
+      "source": [
+        "included_cols = [f\"segmented_data_classifier_data_{category}\" for category in category_list]\n",
+        "included_cols.append(\"segmented_data_classifier_data_positive\")\n",
+        "included_cols.append(\"segmented_data_classifier_data_negative\")\n",
+        "included_cols.append(\"processed_text\")\n",
+        "included_cols.append(\"meta_at\")\n",
+        "included_cols.append(\"meta_date\")\n",
+        "included_cols.append(\"meta_published date\")\n",
+        "included_cols.append(\"meta_rating\")\n",
+        "# included_cols.append(\"meta_title\")\n",
+        "included_cols.append(\"meta_publisher_title\")\n",
+        "\n",
+        "rename_cols_dict = {f\"segmented_data_classifier_data_{category}\": category for category in category_list}\n",
+        "rename_cols_dict[\"segmented_data_classifier_data_positive\"] = \"positive\"\n",
+        "rename_cols_dict[\"segmented_data_classifier_data_negative\"] = \"negative\"\n",
+        "rename_cols_dict[\"processed_text\"] = \"text\"\n",
+        "rename_cols_dict[\"meta_at\"] = \"time\"\n",
+        "rename_cols_dict[\"meta_date\"] = \"time\"\n",
+        "rename_cols_dict[\"meta_rating\"] = \"ratings\"\n",
+        "rename_cols_dict[\"meta_published date\"] = \"time\"\n",
+        "# rename_cols_dict[\"meta_title\"] = \"title\"\n",
+        "rename_cols_dict[\"meta_publisher_title\"] = \"news publisher\"\n",
+        "rename_cols_dict['Unnamed: 0'] = 'reviews'"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "CPP5pM0eVGq2"
+      },
+      "source": [
+        "## Configure App Store Review Observer"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "Yr8ucKlJHRUg"
+      },
+      "source": [
+        "from obsei.source.appstore_scrapper import (\n",
+        "    AppStoreScrapperConfig,\n",
+        "    AppStoreScrapperSource,\n",
+        ")\n",
+        "\n",
+        "source_config = AppStoreScrapperConfig(\n",
+        "    countries=[country],\n",
+        "    app_id=identifier,\n",
+        "    lookup_period=lookup_period\n",
+        ")\n",
+        "\n",
+        "source = AppStoreScrapperSource()"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "kgJVp1DgVIpM"
+      },
+      "source": [
+        "## Configure TextCleaner as Pre-Processor to clean review text\n",
+        "These cleaning function will run serially"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "esmT-8IhpHvp",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "5db65872-1017-4bf4-fc15-1dcd5cbc1790"
+      },
+      "source": [
+        "from obsei.preprocessor.text_cleaner import TextCleaner, TextCleanerConfig\n",
+        "from obsei.preprocessor.text_cleaning_function import *\n",
+        "\n",
+        "text_cleaner_config = TextCleanerConfig(\n",
+        "    stop_words=extra_stop_words,\n",
+        "    cleaning_functions = [\n",
+        "        ToLowerCase(),\n",
+        "        RemoveWhiteSpaceAndEmptyToken(),\n",
+        "        RemovePunctuation(),\n",
+        "        RemoveSpecialChars(),\n",
+        "        DecodeUnicode(),\n",
+        "        RemoveDateTime(),\n",
+        "        RemoveStopWords(),\n",
+        "        RemoveStopWords(stop_words=extra_stop_words),\n",
+        "        RemoveWhiteSpaceAndEmptyToken(),\n",
+        "   ]\n",
+        ")\n",
+        "\n",
+        "text_cleaner = TextCleaner()"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "[nltk_data] Downloading package stopwords to /root/nltk_data...\n",
+            "[nltk_data]   Package stopwords is already up-to-date!\n"
+          ],
+          "name": "stderr"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "6_DNazzVVHVV"
+      },
+      "source": [
+        "## Configure Classification Analyzer\n",
+        "**Note**: Select model from https://huggingface.co/models?pipeline_tag=zero-shot-classification, if you want to try different one"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "26ASFOm_HW2s"
+      },
+      "source": [
+        "from obsei.analyzer.classification_analyzer import ClassificationAnalyzerConfig, ZeroShotClassificationAnalyzer\n",
+        "\n",
+        "analyzer_config=ClassificationAnalyzerConfig(\n",
+        "   labels=category_list,\n",
+        ")\n",
+        "\n",
+        "text_analyzer = ZeroShotClassificationAnalyzer(\n",
+        "   model_name_or_path=\"typeform/mobilebert-uncased-mnli\",\n",
+        "   device=\"auto\"\n",
+        ")"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "vTdPn25kVH9l"
+      },
+      "source": [
+        "## Configure Pandas DataFrame Informer"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "1aLYa-QBHaDs"
+      },
+      "source": [
+        "from pandas import DataFrame\n",
+        "from obsei.sink.pandas_sink import PandasSink, PandasSinkConfig\n",
+        "\n",
+        "sink_config = PandasSinkConfig(\n",
+        "   dataframe=DataFrame(),\n",
+        "   include_columns_list=included_cols\n",
+        ")\n",
+        "sink = PandasSink()"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "n0hDL8ChVMuN"
+      },
+      "source": [
+        "## Fetch app reviews"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "YmTz7qKayCTW"
+      },
+      "source": [
+        "source_response_list = source.lookup(source_config)"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "fPi5DG8oVNK-"
+      },
+      "source": [
+        "## PreProcess review text to clean it"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "TnbjTr4vyHOk"
+      },
+      "source": [
+        "cleaner_response_list = text_cleaner.preprocess_input(\n",
+        "    input_list=source_response_list,\n",
+        "    config=text_cleaner_config\n",
+        ")"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "5T1hDzG9VNnd"
+      },
+      "source": [
+        "## Analyze reviews to perform classification\n",
+        "**Note**: This is compute heavy step"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "qZyRJIcjyEIu",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "60d24e0c-7f95-4e52-b311-276f31329d0e"
+      },
+      "source": [
+        "analyzer_response_list = text_analyzer.analyze_input(\n",
+        "    source_response_list=cleaner_response_list,\n",
+        "    analyzer_config=analyzer_config\n",
+        ")"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n"
+          ],
+          "name": "stderr"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "-VhyKGLWVOSB"
+      },
+      "source": [
+        "## Inform review data in form of Pandas DataFrame"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "zFPdISnWHr9j",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 424
+        },
+        "outputId": "9ebd0b7e-d5e3-4777-cf8c-60e813ead9a9"
+      },
+      "source": [
+        "sink_config = PandasSinkConfig(\n",
+        "   dataframe=DataFrame(),\n",
+        "   include_columns_list=included_cols\n",
+        ")\n",
+        "\n",
+        "dataframe = sink.send_data(analyzer_response_list, sink_config)\n",
+        "dataframe.rename(rename_cols_dict,axis=1,inplace=True)\n",
+        "\n",
+        "\n",
+        "dataframe[\"brand\"] = name\n",
+        "dataframe"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>text</th>\n",
+              "      <th>positive</th>\n",
+              "      <th>user interface</th>\n",
+              "      <th>rewards and discounts</th>\n",
+              "      <th>negative</th>\n",
+              "      <th>realtime order tracking</th>\n",
+              "      <th>social media Integration</th>\n",
+              "      <th>easy order placement</th>\n",
+              "      <th>easy payment options</th>\n",
+              "      <th>ratings</th>\n",
+              "      <th>time</th>\n",
+              "      <th>brand</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>awesome unmade zomato user switched limited re...</td>\n",
+              "      <td>0.72</td>\n",
+              "      <td>0.11</td>\n",
+              "      <td>0.06</td>\n",
+              "      <td>0.02</td>\n",
+              "      <td>0.02</td>\n",
+              "      <td>0.02</td>\n",
+              "      <td>0.01</td>\n",
+              "      <td>0.01</td>\n",
+              "      <td>5</td>\n",
+              "      <td>2021-07-10 12:21:41</td>\n",
+              "      <td>zomato</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>best service mast service thii time</td>\n",
+              "      <td>0.99</td>\n",
+              "      <td>0.26</td>\n",
+              "      <td>0.17</td>\n",
+              "      <td>0.00</td>\n",
+              "      <td>0.16</td>\n",
+              "      <td>0.01</td>\n",
+              "      <td>0.21</td>\n",
+              "      <td>0.29</td>\n",
+              "      <td>5</td>\n",
+              "      <td>2021-07-10 12:20:34</td>\n",
+              "      <td>zomato</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>nice nice</td>\n",
+              "      <td>1.00</td>\n",
+              "      <td>0.70</td>\n",
+              "      <td>0.38</td>\n",
+              "      <td>0.00</td>\n",
+              "      <td>0.30</td>\n",
+              "      <td>0.06</td>\n",
+              "      <td>0.44</td>\n",
+              "      <td>0.58</td>\n",
+              "      <td>5</td>\n",
+              "      <td>2021-07-10 12:20:07</td>\n",
+              "      <td>zomato</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>listening single cheese burger concern love zo...</td>\n",
+              "      <td>0.98</td>\n",
+              "      <td>0.81</td>\n",
+              "      <td>0.00</td>\n",
+              "      <td>0.00</td>\n",
+              "      <td>0.05</td>\n",
+              "      <td>0.00</td>\n",
+              "      <td>0.06</td>\n",
+              "      <td>0.07</td>\n",
+              "      <td>5</td>\n",
+              "      <td>2021-07-10 12:19:20</td>\n",
+              "      <td>zomato</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>good good</td>\n",
+              "      <td>1.00</td>\n",
+              "      <td>0.62</td>\n",
+              "      <td>0.42</td>\n",
+              "      <td>0.00</td>\n",
+              "      <td>0.50</td>\n",
+              "      <td>0.05</td>\n",
+              "      <td>0.53</td>\n",
+              "      <td>0.69</td>\n",
+              "      <td>5</td>\n",
+              "      <td>2021-07-10 12:15:17</td>\n",
+              "      <td>zomato</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>...</th>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>495</th>\n",
+              "      <td>nice gud</td>\n",
+              "      <td>1.00</td>\n",
+              "      <td>0.87</td>\n",
+              "      <td>0.30</td>\n",
+              "      <td>0.00</td>\n",
+              "      <td>0.14</td>\n",
+              "      <td>0.08</td>\n",
+              "      <td>0.36</td>\n",
+              "      <td>0.68</td>\n",
+              "      <td>5</td>\n",
+              "      <td>2021-07-07 15:54:35</td>\n",
+              "      <td>zomato</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>496</th>\n",
+              "      <td>bad experience delivery guy refused take rs no...</td>\n",
+              "      <td>0.00</td>\n",
+              "      <td>0.29</td>\n",
+              "      <td>0.08</td>\n",
+              "      <td>1.00</td>\n",
+              "      <td>0.02</td>\n",
+              "      <td>0.03</td>\n",
+              "      <td>0.00</td>\n",
+              "      <td>0.00</td>\n",
+              "      <td>1</td>\n",
+              "      <td>2021-07-07 15:54:24</td>\n",
+              "      <td>zomato</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>497</th>\n",
+              "      <td>shikha excellent</td>\n",
+              "      <td>1.00</td>\n",
+              "      <td>0.94</td>\n",
+              "      <td>0.45</td>\n",
+              "      <td>0.00</td>\n",
+              "      <td>0.48</td>\n",
+              "      <td>0.06</td>\n",
+              "      <td>0.70</td>\n",
+              "      <td>0.91</td>\n",
+              "      <td>5</td>\n",
+              "      <td>2021-07-07 15:53:40</td>\n",
+              "      <td>zomato</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>498</th>\n",
+              "      <td>ordered delivery yet pathetic service</td>\n",
+              "      <td>0.00</td>\n",
+              "      <td>0.27</td>\n",
+              "      <td>0.01</td>\n",
+              "      <td>1.00</td>\n",
+              "      <td>0.00</td>\n",
+              "      <td>0.02</td>\n",
+              "      <td>0.00</td>\n",
+              "      <td>0.00</td>\n",
+              "      <td>1</td>\n",
+              "      <td>2021-07-07 15:47:03</td>\n",
+              "      <td>zomato</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>499</th>\n",
+              "      <td>super awesome experience</td>\n",
+              "      <td>0.99</td>\n",
+              "      <td>0.37</td>\n",
+              "      <td>0.06</td>\n",
+              "      <td>0.00</td>\n",
+              "      <td>0.09</td>\n",
+              "      <td>0.01</td>\n",
+              "      <td>0.01</td>\n",
+              "      <td>0.09</td>\n",
+              "      <td>5</td>\n",
+              "      <td>2021-07-07 15:40:27</td>\n",
+              "      <td>zomato</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>500 rows × 12 columns</p>\n",
+              "</div>"
+            ],
+            "text/plain": [
+              "                                                  text  ...   brand\n",
+              "0    awesome unmade zomato user switched limited re...  ...  zomato\n",
+              "1                  best service mast service thii time  ...  zomato\n",
+              "2                                            nice nice  ...  zomato\n",
+              "3    listening single cheese burger concern love zo...  ...  zomato\n",
+              "4                                            good good  ...  zomato\n",
+              "..                                                 ...  ...     ...\n",
+              "495                                           nice gud  ...  zomato\n",
+              "496  bad experience delivery guy refused take rs no...  ...  zomato\n",
+              "497                                   shikha excellent  ...  zomato\n",
+              "498              ordered delivery yet pathetic service  ...  zomato\n",
+              "499                           super awesome experience  ...  zomato\n",
+              "\n",
+              "[500 rows x 12 columns]"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          },
+          "execution_count": 12
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "SYEFvvx6VPyp"
+      },
+      "source": [
+        "## Store result in Google Drive as CSV"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "_R-UBzg4ma3-"
+      },
+      "source": [
+        "dataframe.to_csv(f'/content/drive/MyDrive/appstore_{name}.csv')"
+      ],
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}
diff --git a/obsei_module/tutorials/04_GoogleNews_Cleaner_Splitter_Classification_Aggregator.ipynb b/obsei_module/tutorials/04_GoogleNews_Cleaner_Splitter_Classification_Aggregator.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..8a1a4a8e04b6e92095ba34b03587bcf317eeaaf4
--- /dev/null
+++ b/obsei_module/tutorials/04_GoogleNews_Cleaner_Splitter_Classification_Aggregator.ipynb
@@ -0,0 +1,352 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "name": "04_GoogleNews_Cleaner_Splitter_Classification_Aggregator.ipynb",
+      "provenance": []
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    },
+    "accelerator": "GPU"
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "23R6n3CjjMzV"
+      },
+      "source": [
+        "# Obsei Tutorial 04\n",
+        "## This example shows following Obsei workflow\n",
+        " 1. Observe: Search and fetch news article via Google News\n",
+        " 2. Cleaner: Clean article text proerply\n",
+        " 3. Analyze: Classify article text while splitting text in small chunks and later computing final inference using given formula"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "XQiAxuEqlMpB"
+      },
+      "source": [
+        "## Install Obsei from latest code, perform these steps -\n",
+        "- Select GPU RunType for faster computation \n",
+        "- Restart Runtime after installation"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "CSwBGl1xis5q",
+        "outputId": "cb0676da-e778-46f7-f761-ad59687e125d"
+      },
+      "source": [
+        "!pip install obsei[all]\n",
+        "!pip install trafilatura"
+      ],
+      "execution_count": 1,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "mEIG7Zs-lQVB"
+      },
+      "source": [
+        "## Configure Google News Observer"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "ASPOB5Alla7q"
+      },
+      "source": [
+        "from obsei.source.google_news_source import GoogleNewsConfig, GoogleNewsSource\n",
+        "\n",
+        "source_config = GoogleNewsConfig(\n",
+        "    query=\"bitcoin\",\n",
+        "    max_results=10,\n",
+        "    fetch_article=True,\n",
+        "    lookup_period=\"1d\",\n",
+        ")\n",
+        "\n",
+        "source = GoogleNewsSource()"
+      ],
+      "execution_count": 10,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "CJJ06rpalsYB"
+      },
+      "source": [
+        "## Configure TextCleaner as Pre-Processor to clean review text\n",
+        "These cleaning function will run serially"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "TksY24crlsy6",
+        "outputId": "315f6f9d-ed52-4975-9f14-70f8bc218dee"
+      },
+      "source": [
+        "from obsei.preprocessor.text_cleaner import TextCleaner, TextCleanerConfig\n",
+        "from obsei.preprocessor.text_cleaning_function import *\n",
+        "\n",
+        "text_cleaner_config = TextCleanerConfig(\n",
+        "    cleaning_functions = [\n",
+        "        ToLowerCase(),\n",
+        "        RemoveWhiteSpaceAndEmptyToken(),\n",
+        "        RemovePunctuation(),\n",
+        "        RemoveSpecialChars(),\n",
+        "        DecodeUnicode(),\n",
+        "        RemoveStopWords(),\n",
+        "        RemoveWhiteSpaceAndEmptyToken(),\n",
+        "   ]\n",
+        ")\n",
+        "\n",
+        "text_cleaner = TextCleaner()"
+      ],
+      "execution_count": 14,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "[nltk_data] Downloading package stopwords to /root/nltk_data...\n",
+            "[nltk_data]   Package stopwords is already up-to-date!\n"
+          ],
+          "name": "stderr"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "z45pE_BVl_gu"
+      },
+      "source": [
+        "## Configure Classification Analyzer\n",
+        "\n",
+        "- List of categories in `labels`\n",
+        "- `TextSplitterConfig` with proper `max_split_length` and `split_stride`\n",
+        "- `InferenceAggregatorConfig` with required `aggregate_function` currently two are supported (average and max frequent class)\n",
+        "- `ClassificationMaxCategories` need `score_threshold` which is used to determine what minimum probability needed to take a class into consideration\n",
+        "\n",
+        "**Note**: Select model from https://huggingface.co/models?pipeline_tag=zero-shot-classification, if you want to try different one"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "RGzOD5yrl_CB"
+      },
+      "source": [
+        "from obsei.analyzer.classification_analyzer import ClassificationAnalyzerConfig, ZeroShotClassificationAnalyzer\n",
+        "from obsei.postprocessor.inference_aggregator import InferenceAggregatorConfig\n",
+        "from obsei.postprocessor.inference_aggregator_function import ClassificationMaxCategories\n",
+        "from obsei.preprocessor.text_splitter import TextSplitterConfig\n",
+        "\n",
+        "analyzer_config=ClassificationAnalyzerConfig(\n",
+        "   labels=[\"buy\", \"sell\", \"going up\", \"going down\"],\n",
+        "   use_splitter_and_aggregator=True,\n",
+        "   splitter_config=TextSplitterConfig(\n",
+        "       max_split_length=300,\n",
+        "       split_stride=3\n",
+        "   ),\n",
+        "   aggregator_config=InferenceAggregatorConfig(\n",
+        "       aggregate_function=ClassificationMaxCategories(\n",
+        "           score_threshold=0.3\n",
+        "       )\n",
+        "   )\n",
+        ")\n",
+        "\n",
+        "text_analyzer = ZeroShotClassificationAnalyzer(\n",
+        "   model_name_or_path=\"typeform/mobilebert-uncased-mnli\",\n",
+        "   device=\"auto\"\n",
+        ")"
+      ],
+      "execution_count": 11,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ScovHM79oMLo"
+      },
+      "source": [
+        "## Search and fetch news article"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "WniWcEzeoOKx",
+        "outputId": "a7b1c1e5-fc44-4b67-8bbe-fa717a5f48bf"
+      },
+      "source": [
+        "source_response_list = source.lookup(source_config)"
+      ],
+      "execution_count": 12,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "07/29/2021 19:08:37 - INFO - urllib3.poolmanager -   Redirecting https://www.bloomberg.com/news/articles/2021-07-29/brokers-sought-for-78-million-bitcoin-stash-from-finland-bust -> https://www.bloomberg.com/tosv2.html?vid=&uuid=60bfaca0-f0a0-11eb-9e4f-53da6d852d97&url=L25ld3MvYXJ0aWNsZXMvMjAyMS0wNy0yOS9icm9rZXJzLXNvdWdodC1mb3ItNzgtbWlsbGlvbi1iaXRjb2luLXN0YXNoLWZyb20tZmlubGFuZC1idXN0\n",
+            "07/29/2021 19:08:37 - INFO - trafilatura.core -   using custom extraction: None\n",
+            "07/29/2021 19:08:37 - INFO - trafilatura.core -   not enough comments None\n",
+            "07/29/2021 19:08:37 - INFO - urllib3.poolmanager -   Redirecting https://www.bloomberg.com/news/articles/2021-07-29/new-ira-product-allows-for-tax-free-bitcoin-mining -> https://www.bloomberg.com/tosv2.html?vid=&uuid=60f22e50-f0a0-11eb-90b4-6d36db3c27b3&url=L25ld3MvYXJ0aWNsZXMvMjAyMS0wNy0yOS9uZXctaXJhLXByb2R1Y3QtYWxsb3dzLWZvci10YXgtZnJlZS1iaXRjb2luLW1pbmluZw==\n",
+            "07/29/2021 19:08:37 - INFO - trafilatura.core -   using custom extraction: None\n",
+            "07/29/2021 19:08:37 - ERROR - trafilatura.core -   not enough text None\n",
+            "07/29/2021 19:08:37 - INFO - trafilatura.core -   not enough comments None\n",
+            "07/29/2021 19:08:38 - INFO - trafilatura.core -   using custom extraction: None\n",
+            "07/29/2021 19:08:38 - INFO - trafilatura.core -   not enough comments None\n",
+            "07/29/2021 19:08:38 - INFO - readability.readability -   ruthless removal did not work. \n",
+            "07/29/2021 19:08:38 - INFO - trafilatura.core -   using custom extraction: None\n",
+            "07/29/2021 19:08:38 - INFO - trafilatura.core -   not enough comments None\n",
+            "07/29/2021 19:08:39 - INFO - trafilatura.core -   using custom extraction: None\n",
+            "07/29/2021 19:08:39 - INFO - trafilatura.core -   not enough comments None\n",
+            "07/29/2021 19:08:39 - INFO - trafilatura.core -   using custom extraction: None\n",
+            "07/29/2021 19:08:39 - INFO - trafilatura.core -   not enough comments None\n",
+            "07/29/2021 19:08:40 - INFO - trafilatura.core -   using custom extraction: None\n",
+            "07/29/2021 19:08:40 - INFO - trafilatura.core -   not enough comments None\n",
+            "07/29/2021 19:08:40 - INFO - trafilatura.core -   using custom extraction: None\n",
+            "07/29/2021 19:08:40 - INFO - trafilatura.core -   not enough comments None\n",
+            "07/29/2021 19:08:40 - INFO - readability.readability -   ruthless removal did not work. \n",
+            "07/29/2021 19:08:40 - INFO - trafilatura.core -   using generic algorithm: None\n",
+            "07/29/2021 19:08:40 - INFO - trafilatura.core -   not enough comments None\n",
+            "07/29/2021 19:08:40 - INFO - trafilatura.core -   text and comments not long enough: 0 0\n",
+            "07/29/2021 19:08:40 - INFO - trafilatura.core -   discarding data for url: None\n",
+            "07/29/2021 19:08:41 - INFO - trafilatura.core -   using custom extraction: None\n",
+            "07/29/2021 19:08:41 - INFO - trafilatura.core -   not enough comments None\n"
+          ],
+          "name": "stderr"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "KEvZ32HZoY57"
+      },
+      "source": [
+        "## PreProcess text to clean it"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "OpFEbn1joYbK"
+      },
+      "source": [
+        "cleaner_response_list = text_cleaner.preprocess_input(\n",
+        "    input_list=source_response_list,\n",
+        "    config=text_cleaner_config\n",
+        ")"
+      ],
+      "execution_count": 15,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "wsh3e3uCogxK"
+      },
+      "source": [
+        "## Analyze article to perform classification\n",
+        "**Note**: This is compute heavy step"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "YSe3F6EyohX9",
+        "outputId": "a33d01b6-d632-4475-9ae8-abd46d7b45f0"
+      },
+      "source": [
+        "analyzer_response_list = text_analyzer.analyze_input(\n",
+        "    source_response_list=cleaner_response_list,\n",
+        "    analyzer_config=analyzer_config\n",
+        ")"
+      ],
+      "execution_count": 16,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n"
+          ],
+          "name": "stderr"
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "YS9Oc4DrovN_"
+      },
+      "source": [
+        "## Print Result"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "Kl7OIaDxov3L",
+        "outputId": "38520cd8-c8a9-4212-ade8-99db905bc6b7"
+      },
+      "source": [
+        "for analyzer_response in analyzer_response_list:\n",
+        "  print(vars(analyzer_response))"
+      ],
+      "execution_count": 17,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "{'segmented_data': {'aggregator_data': {'category_count': {'positive': 2, 'going up': 2, 'sell': 1}, 'max_scores': {'positive': 0.806824266910553, 'going up': 0.5611677169799805, 'sell': 0.5141412019729614}, 'aggregator_name': 'ClassificationMaxCategories'}}, 'meta': {'title': 'Bitcoin (BTC USD) Cryptocurrency Price News: Finland Seeks Broker to Sell Stash - Bloomberg', 'description': 'Bitcoin (BTC USD) Cryptocurrency Price News: Finland Seeks Broker to Sell Stash  Bloomberg', 'published date': 'Thu, 29 Jul 2021 13:11:21 GMT', 'url': 'https://www.bloomberg.com/news/articles/2021-07-29/brokers-sought-for-78-million-bitcoin-stash-from-finland-bust', 'publisher': {'href': 'https://www.bloomberg.com', 'title': 'Bloomberg'}, 'extracted_data': {'title': 'Bloomberg', 'author': None, 'hostname': None, 'date': None, 'categories': '', 'tags': '', 'fingerprint': 'BecpvREYR0Bqj6DjTeoRthAFuAs=', 'id': '6e25ac22', 'source': None, 'source-hostname': 'Are you a robot?', 'excerpt': None}}, 'source_name': 'GoogleNews', 'processed_text': 'bitcoin btc usd cryptocurrency price news finland seeks broker sell stash bloomberg continue please click box let us know robot please make sure browser supports javascript cookies blocking loading information review terms service cookie policy inquiries related message please contact support team team provide reference id'}\n",
+            "{'segmented_data': {'aggregator_data': {'category_count': {'positive': 1, 'going up': 1}, 'max_scores': {'positive': 0.7125387787818909, 'going up': 0.7029632329940796}, 'aggregator_name': 'ClassificationMaxCategories'}}, 'meta': {'title': 'Bitcoin (BTC USD) Cryptocurrency Mining Gets Tax Savings With New IRA - Bloomberg', 'description': 'Bitcoin (BTC USD) Cryptocurrency Mining Gets Tax Savings With New IRA  Bloomberg', 'published date': 'Thu, 29 Jul 2021 14:00:00 GMT', 'url': 'https://www.bloomberg.com/news/articles/2021-07-29/new-ira-product-allows-for-tax-free-bitcoin-mining', 'publisher': {'href': 'https://www.bloomberg.com', 'title': 'Bloomberg'}, 'extracted_data': {'title': 'Bloomberg', 'author': None, 'hostname': None, 'date': None, 'categories': '', 'tags': '', 'fingerprint': '1f8W9+brqRuBfWvFtSJ5NU8s49g=', 'id': '10fba49b', 'source': None, 'source-hostname': 'Are you a robot?', 'excerpt': None}}, 'source_name': 'GoogleNews', 'processed_text': 'bitcoin btc usd cryptocurrency mining gets tax savings new ira bloomberg continue please click box let us know robot terms service cookie policy contact support team provide reference id'}\n",
+            "{'segmented_data': {'aggregator_data': {'category_count': {'going up': 7, 'positive': 8, 'going down': 2, 'negative': 1}, 'max_scores': {'going up': 0.9746450185775757, 'positive': 0.8058925867080688, 'going down': 0.8555613160133362, 'negative': 0.594179630279541}, 'aggregator_name': 'ClassificationMaxCategories'}}, 'meta': {'title': 'Bitcoin price hovers around $40000 - Fox Business', 'description': 'Bitcoin price hovers around $40000  Fox Business', 'published date': 'Thu, 29 Jul 2021 08:40:26 GMT', 'url': 'https://www.foxbusiness.com/markets/bitcoin-price-7-29', 'publisher': {'href': 'https://www.foxbusiness.com', 'title': 'Fox Business'}, 'extracted_data': {'title': 'Bitcoin price hovers around $40,000', 'author': 'Ken Martin', 'hostname': 'foxbusiness.com', 'date': '2021-07-29', 'categories': '', 'tags': '', 'fingerprint': '0bin0zBplkMxTzzjw7s2HBTsoi4=', 'id': '1057f820', 'source': 'https://www.foxbusiness.com/markets/bitcoin-price-7-29', 'source-hostname': 'Fox Business', 'excerpt': None}}, 'source_name': 'GoogleNews', 'processed_text': 'bitcoin price hovers around 40000 fox business bitcoin price hovers around 40000 infrastructure deal agreed upon lawmakers president biden includes crackdown cryptocurrencies one way raise revenue bitcoin slightly higher thursday morning trading 40000 level bitcoin slightly higher thursday morning morning trading 40000 level price around 39800 per coin rivals ethereum dogecoin trading around 2290 20 cents per coin respectively according coindesk ethereum dogecoin trading around 2290 20 cents per coin respectively according coindesk crypto crackdown help finance bipartisan infrastructure bill bill crypto crackdown help finance bipartisan infrastructure bill infrastructure deal agreed upon lawmakers president biden wednesday includes crackdown cryptocurrencies one way raise revenue support spending infrastructure deal agreed upon lawmakers president biden wednesday includes crackdown crackdown cryptocurrencies one way raise revenue support spending get fox business go clicking get fox business go clicking order finance 500 billion new spending infrastructure initiatives group said intends strengthen tax enforcement comes cryptocurrencies previously reported fox business irs irs commissioner charles rettig requested broader authority congress june collect information cryptocurrency transactions previously reported fox business irs commissioner charles rettig requested broader authority congress june collect information cryptocurrency transactions rettig said said transactions design often radar screens noting recent market cap crypto world exceeded 2 trillion 8600 exchanges worldwide robinhood prices ipo 38 per share raising 2b robinhood prices ipo 38 per share raising 2b news robinhood market inc begin trading public company thursday following pricing pricing initial public offering robinhood market inc begin trading public company thursday following pricing initial public offering company says priced 55 million shares 38 share comes low end 38 42 offering range offering raised 2 billion puts robinhood valuation 32 billion shares begin trading trading nasdaq ticker symbol hood click read fox business click read fox business company reportedly working new feature help protect users crypto price volatility hiring manager formerly new feature help protect users crypto price volatility hiring manager formerly google improve overall product product design according coindesk fox business brittany de lea contributed report'}\n",
+            "{'segmented_data': {'aggregator_data': {'category_count': {'going down': 1, 'positive': 1, 'going up': 1}, 'max_scores': {'going down': 0.5592474937438965, 'positive': 0.66602623462677, 'going up': 0.6636335253715515}, 'aggregator_name': 'ClassificationMaxCategories'}}, 'meta': {'title': 'Institutional investors are bullish on bitcoin again, based on this key data point - CNBC', 'description': 'Institutional investors are bullish on bitcoin again, based on this key data point  CNBC', 'published date': 'Wed, 28 Jul 2021 21:10:13 GMT', 'url': 'https://www.cnbc.com/2021/07/28/institutional-investors-are-bullish-on-bitcoin-again-based-on-this-key-data-point.html', 'publisher': {'href': 'https://www.cnbc.com', 'title': 'CNBC'}, 'extracted_data': {'title': 'Institutional investors are bullish on bitcoin again, based on this key data point', 'author': 'Tanaya Macheel', 'hostname': 'cnbc.com', 'date': '2021-07-28', 'categories': 'Markets', 'tags': 'Coinbase Global Inc,Cryptocurrency,Bitcoin,Markets,business news;Coinbase Global Inc;Cryptocurrency;Bitcoin;Markets', 'fingerprint': '+Q2CPT89AAI5Vn5g8fu+SDmyvtE=', 'id': '7a9162df', 'source': 'https://www.cnbc.com/2021/07/28/institutional-investors-are-bullish-on-bitcoin-again-based-on-this-key-data-point.html', 'source-hostname': 'CNBC', 'excerpt': 'About $2.5 billion in bitcoin moved off crypto exchanges Wednesday morning.'}}, 'source_name': 'GoogleNews', 'processed_text': 'institutional investors bullish bitcoin based key data point cnbc 25 billion bitcoin moved crypto exchanges wednesday morning signal institutional investors getting sidelines bearish weeks cryptocurrency balance bitcoin exchanges fell 63289 btc transferred platforms according blockchain data data provider glassnode tracks flows exchanges including coinbase kraken binance coinbase kraken binance cryptocurrency'}\n",
+            "{'segmented_data': {'aggregator_data': {'category_count': {'going up': 4, 'positive': 1, 'negative': 3, 'going down': 1}, 'max_scores': {'going up': 0.9795074462890625, 'positive': 0.8666032552719116, 'negative': 0.6999003887176514, 'going down': 0.6623135209083557}, 'aggregator_name': 'ClassificationMaxCategories'}}, 'meta': {'title': 'Bitcoin Overbought at $40K Resistance; Support at $34K-$36K - CoinDesk - CoinDesk', 'description': 'Bitcoin Overbought at $40K Resistance; Support at $34K-$36K - CoinDesk  CoinDesk', 'published date': 'Thu, 29 Jul 2021 11:26:51 GMT', 'url': 'https://www.coindesk.com/bitcoin-overbought-at-40k-resistance-support-at-34k-36k', 'publisher': {'href': 'https://www.coindesk.com', 'title': 'CoinDesk'}, 'extracted_data': {'title': 'Bitcoin Overbought at $40K Resistance; Support at $34K-$36K - CoinDesk', 'author': 'Damanick Dantes; Damanick Dantes', 'hostname': 'coindesk.com', 'date': '2021-07-29', 'categories': 'Markets', 'tags': '', 'fingerprint': '+/Yd9ybp8pjiIf4k9x8lJMNK1D8=', 'id': '33c1eec8', 'source': 'https://www.coindesk.com/bitcoin-overbought-at-40k-resistance-support-at-34k-36k', 'source-hostname': 'CoinDesk', 'excerpt': 'Bitcoin (BTC) is re-testing the $40K resistance level and appears overbought. Buyers could take profits as short-term momentum wanes.'}}, 'source_name': 'GoogleNews', 'processed_text': 'bitcoin overbought 40k resistance support 34k 36k coindesk coindesk bitcoin btc completed recovery monday 10 pullback retesting 40000 resistance level cryptocurrency appears overbought could trigger profit taking near 25 rally past week btc completed recovery monday 10 pullback retesting 40000 40000 resistance level cryptocurrency appears overbought could trigger profit taking near 25 rally past week lower support seen around 34000 36000 middle twomonth range relative strength index rsi fourhour chart declining extreme overbought reading monday lower high rsi indicates bearish divergence divergence could stall bitcoin shortterm uptrend initial support seen 50period moving average fourhour chart currently 36000 lower support around 32000 34000 could stabilize pullback intermediateterm trend improving significant loss downside momentum past weeks buyers could remain active lower lower support levels although breakout 40000 45000 needed resume longterm uptrend strict set editorial policies coindesk independent operating subsidiary digital currency group invests cryptocurrencies blockchain startups'}\n",
+            "{'segmented_data': {'aggregator_data': {'category_count': {'going up': 15, 'positive': 13, 'going down': 7, 'negative': 6}, 'max_scores': {'going up': 0.9941290616989136, 'positive': 0.7976483702659607, 'going down': 0.9561280608177185, 'negative': 0.9916792511940002}, 'aggregator_name': 'ClassificationMaxCategories'}}, 'meta': {'title': 'Market Wrap: Bitcoin Expected to Pause Before Next Rally - CoinDesk - CoinDesk', 'description': 'Market Wrap: Bitcoin Expected to Pause Before Next Rally - CoinDesk  CoinDesk', 'published date': 'Wed, 28 Jul 2021 20:39:06 GMT', 'url': 'https://www.coindesk.com/market-wrap-bitcoin-expected-to-pause-before-next-rally', 'publisher': {'href': 'https://www.coindesk.com', 'title': 'CoinDesk'}, 'extracted_data': {'title': 'Market Wrap: Bitcoin Expected to Pause Before Next Rally - CoinDesk', 'author': 'Damanick Dantes; Frances Yue; Damanick Dantes; Frances Yue', 'hostname': 'coindesk.com', 'date': '2021-07-28', 'categories': 'Markets', 'tags': '', 'fingerprint': 'SPuuujtthDxGiJ7Dt6ayWt7Rg1g=', 'id': '9fc9b88e', 'source': 'https://www.coindesk.com/market-wrap-bitcoin-expected-to-pause-before-next-rally', 'source-hostname': 'CoinDesk', 'excerpt': 'Analysts expect bitcoin (BTC) to pause around $40K resistance before the next leg up. Sentiment is approving, but appears overbought.'}}, 'source_name': 'GoogleNews', 'processed_text': 'market wrap bitcoin expected pause next rally coindesk coindesk bitcoin buyers profittaking mode cryptocurrency tests 40000 resistance level sentiment significantly improved past week although analysts think time pause another leg higher btc easily broke 35k think probably harder time going 40k time time justin chuh senior trader wave financial wrote email coindesk wave financial wrote email coindesk miners sellers coming cash buyers unable push higher absorbing hit chuh wrote latest prices cryptocurrencies traditional markets p 500 44039 0056 gold 18081 05 10year treasury yield closed 1233 1233 compared 1238 moving average watch sentiment easily shift bullish bearish bitcoin remains consolidation phase strong overhead resistance btc already rejected 200day moving average like early june try breather hopefully crawling lower 35k chuh wrote bitcoin cross 200day signal confidence market market demonstrate many players bulls regained control market alexandra clark trader ukbased digitalasset broker globalblock wrote email coindesk globalblock wrote email coindesk trading activity sharply higher compared june shortdated call options actively traded wednesday morning bitcoin bitcoin approached 40000 according data skew gbtc discount narrows grayscale bitcoin trust gbtc shares narrowed discount relative underlying cryptocurrency held fund possibly sign buyers using vehicle bet recent recovery rally digitalasset markets narrowed discount relative underlying cryptocurrency cryptocurrency held fund possibly sign buyers using vehicle bet recent recovery rally digitalasset markets gbtc shares traded discount 66 net asset value nav tuesday smallest margin since june 22 based data provided crypto derivatives research firm skew discount widened 15 midjune skew discount discount widened 15 midjune investors may snapped gbtc shares hopes discount evaporate bull revival bitcoin scenario buyers would reap price gains bitcoin pocketing extra profit narrowing discount grayscale investments manages trust unit digital currency group also owns coindesk ether trading trading volumes surge ether market grew three times faster bitcoin market first six months year large investors diversified native token ethereum blockchain according crypto exchange coinbase halfyearly review published monday grew three times faster bitcoin market first six months year large large investors diversified native token ethereum blockchain according crypto exchange coinbase halfyearly review published monday crypto ceos bullish crypto investors endured one toughest quarters record despite recent rebound fears overregulation clampdown mining china environmental concerns concerns contributed negative sentiment sector coindesk 20 assets constitute 99 crypto market verifiable volume finished second quarter negative returns coindesk bitcoin price index xbx fell 40 thirdworst quarter ever conversely coindesk ether price index etx ended quarter 187 bitcoin recovered recovered losses level optimism far start second quarter crypto ceos however still expect sixfigure bitcoin price saying mediumterm outlook crypto market positive even sentiment coindesk canny reports reports stablecoins spotlight stablecoins existed roughly seven years talk never heated recent recent weeks within crypto community also among regulators traditional market investors much going world stablecoins recently overwhelming three big things happening three big things happening tether cloud traded cryptocurrency market usdt become backbone entire cryptocurrency ecosystem half bitcoin bitcoin trades made however tether company behind digital token plagued regulatory issues regulatory heat stablecoins total market capitalization 116 billion monday almost fourfold increase since start year according coinmarketcap growth increased attention us regulators circle going public public stablecoin issuers disclose info circle issuer usdc second largest stablecoin also spotlight circle plans go public merger concord acquisition corp publicly traded special purpose acquisition corporation spac deal would value crypto financial services firm 45 billion another stablecoin issuer issuer paxos also released first time breakdown reserves stablecoins paxos standard binancelabeled busd 96 reserves held cash cash equivalents 4 invested us treasury bills june 30 altcoin roundup xrp rallies xrp cryptocurrency used ripple payments network rallied fiveweek high wednesday company said said targeting 18 billion filipino remittance market cryptocurrency changed hands 074 european hours hitting highest level since june 21 representing 13 gain day according coindesk 20 data ether trading volume surges ether trading volume totaled 14 trillion januarytojune period 1461 rise 92 billion billion observed first half last year burger king brazil accepts dogecoin burger king brazil accepts dogecoin doge 264 payment method purchase fastfood chain dogpper dog snack service available since monday according company official website though users check availability delivery region company company said dogpper dog treat plays name burger king bestknown menu item whopper costs 3 doge company recommends purchasing maximum five units per order availability reasons relevant news luxor technologies launches index crypto mining stocks mining difficulty expected increase first time since since china crackdown ubs mulls offering prime brokerage services crypto etps european hedge funds sources grayscale bitcoin trust discount narrows unlocks pass ftx renames blockfolio trading app ftx robinhood investigation finra registration violation markets digital assets coindesk 20 ended higher higher wednesday notable winners 2100 utc 400 pm et notable losers yearn finance yfi 023 yearn finance yfi 023 strict set editorial policies coindesk independent operating subsidiary digital currency group invests cryptocurrencies blockchain startups'}\n",
+            "{'segmented_data': {'aggregator_data': {'category_count': {'going up': 8, 'positive': 4, 'negative': 3, 'going down': 1}, 'max_scores': {'going up': 0.9944525361061096, 'positive': 0.8424510955810547, 'negative': 0.7220838665962219, 'going down': 0.5190926194190979}, 'aggregator_name': 'ClassificationMaxCategories'}}, 'meta': {'title': \"Bitcoin is at a 'do-or-die' moment and could surge if it can hold above $40,000, analysts say - Markets Insider\", 'description': \"Bitcoin is at a 'do-or-die' moment and could surge if it can hold above $40,000, analysts say  Markets Insider\", 'published date': 'Thu, 29 Jul 2021 10:24:13 GMT', 'url': 'https://markets.businessinsider.com/news/currencies/bitcoin-price-outlook-btc-rally-40000-level-crypto-elon-musk-2021-7', 'publisher': {'href': 'https://markets.businessinsider.com', 'title': 'Markets Insider'}, 'extracted_data': {'title': \"Bitcoin is at a 'do-or-die' moment and could surge if it holds above $40,000, analysts say\", 'author': 'Harry Robertson', 'hostname': 'businessinsider.com', 'date': '2021-07-29', 'categories': '', 'tags': '', 'fingerprint': '5KmRiFmCkYpMa+BZs3PF/o1sB+g=', 'id': 'dd6031b3', 'source': 'https://markets.businessinsider.com/news/currencies/bitcoin-price-outlook-btc-rally-40000-level-crypto-elon-musk-2021-7', 'source-hostname': 'markets.businessinsider.com', 'excerpt': 'Bitcoin has climbed sharply in recent days. REUTERS/Dado Ruvic Bitcoin could surge if it can hold on to its recent gains and consolidate above...'}}, 'source_name': 'GoogleNews', 'processed_text': 'bitcoin doordie moment could surge hold 40000 analysts say markets insider bitcoin doordie moment could surge holds 40000 analysts say bitcoin could surge hold recent gains consolidate 40000 analysts said kraken analysts said token facing doordie moment could hit new alltime high yet many risks risks horizon particularly threat regulation around world bitcoin facing doordie moment could move sharply higher hold stellar weekly gains taken roughly 40000 crypto analysts said bitcoin facing doordie moment could move sharply higher hold stellar weekly gains taken roughly 40000 crypto analysts analysts said world biggest cryptocurrency risen around 17 week far 40403 thursday according bloomberg data positive comments elon musk rumours amazon could accept crypto payments elon musk rumours amazon could accept crypto payments analysts kraken crypto exchange said past week explosive explosive marketwide shift sentiment seems converted bears bulls said bitcoin need consolidate key psychological level 40000 investors feel confident buying token pushing price higher right doordie bulls said note given btc struggles cracking 40000 42000 resistance past bulls however need turn 40000 40000 support look breakout several months rangebound trading 30000 42000 analysts added odds btc scoring new alltime high yearend improved read veteran columnist breaks 3 things every crypto newbie know trading 3 common mistakes avoid veteran columnist breaks 3 things every crypto newbie know know trading 3 common mistakes avoid alexandra clark sales trader digital asset broker globalblock said rebound 1 billion worth short crypto positions liquidated bitcoin dominance inches closer 50 bitcoin accounted around 48 total crypto market thursday according coinmarketcap jpmorgan crypto expert expert nikolaos panigirtzoglou said bitcoin dominance rising 50 would signal momentum building coinmarketcap jpmorgan crypto expert nikolaos panigirtzoglou said bitcoin dominance rising 50 would signal momentum building analysts said another bullish signal fact amount bitcoin exchanges fell sharply sharply wednesday suggesting big buyers moving crypto storage yet major risks outlook bitcoin key one threat tougher regulations senator elizabeth warren week pushing tougher rules protect consumers wildly volatile market pushing tougher rules protect consumers wildly volatile market wrote treasury treasury secretary janet yellen say us financial stability oversight council must act quickly use statutory authority address cryptocurrencies risks regulate market ensure safety stability consumers financial system business insider'}\n",
+            "{'segmented_data': {'aggregator_data': {'category_count': {'going down': 3, 'negative': 4, 'going up': 8, 'positive': 7}, 'max_scores': {'going down': 0.9420739412307739, 'negative': 0.8024093508720398, 'going up': 0.9577222466468811, 'positive': 0.7244963645935059}, 'aggregator_name': 'ClassificationMaxCategories'}}, 'meta': {'title': 'Bitcoin hash rate rebounds as major miners are coming back online - Cointelegraph', 'description': 'Bitcoin hash rate rebounds as major miners are coming back online  Cointelegraph', 'published date': 'Thu, 29 Jul 2021 10:55:13 GMT', 'url': 'https://cointelegraph.com/news/bitcoin-hash-rate-rebounds-as-major-miners-are-coming-back-online', 'publisher': {'href': 'https://cointelegraph.com', 'title': 'Cointelegraph'}, 'extracted_data': {'title': 'Bitcoin hash rate rebounds as major miners are coming back online', 'author': 'Arijit Sarkar', 'hostname': 'cointelegraph.com', 'date': '2021-07-29', 'categories': 'Latest News', 'tags': '#Bitcoin;#China;#Bitcoin Regulation;#Bitcoin Mining;#Regulation;#Hash Rate', 'fingerprint': 'UlZKhwOEas+O9knXhORLHUGtReU=', 'id': 'ae806a36', 'source': 'https://cointelegraph.com/news/bitcoin-hash-rate-rebounds-as-major-miners-are-coming-back-online', 'source-hostname': 'Cointelegraph', 'excerpt': 'Restrictions in China have forced homegrown Bitcoin miners and miners to move out to crypto-friendly nations.'}}, 'source_name': 'GoogleNews', 'processed_text': 'bitcoin hash rate rebounds major miners coming back online cointelegraph china stringent crypto regulations meant closing shop many chinese businesses within bitcoin btc mining ecosystem sudden disappearance bitcoin miners grid resulted falling hash rates hashing performance cumulative computing computing power bitcoin network dropped alltime high 180 exahashes per second ehs 84 ehs 21 days btc mining ecosystem sudden disappearance bitcoin miners grid resulted falling hash rates hashing performance cumulative computing power bitcoin network dropped alltime high 180 exahashes per second ehs ehs 84 ehs 21 days hash rate drop directly attributable drop number chinese miners blockchaincom explorer data suggests steady increase mining difficulty since june 3 suggests steady increase mining difficulty since june 3 since drop hash rate increased 2138 owing return migrating chinese miners miners started operating regions resulting adjustment bitcoin mining difficulty translates higher computational costs formerly chinabased miners come back online operational costs bitcoin miners worldwide continue increase given initial resistance chinese government miners lookout countries offers offers regulatory clarity lower electricity costs initial resistance chinese government miners lookout countries offers regulatory clarity lower electricity costs related mine bitcoin everything need know mine bitcoin everything need know pretext shielding citizens highrisk investments chinese chinese authorities forced crypto businesses highly limit crypto portfolio offerings move offshore reported cointelegraph earlier month wang juana member china oecd blockchain expert policy advisory board stated reported cointelegraph earlier month wang juana member china oecd blockchain expert expert policy advisory board stated seeing cryptocurrency market enter path dechinaisation first trading computing power based series stronger steps taken cryptocurrencies bitcoin mining last week beijing peak september 2019 china contributed 7553 global bitcoin hash rate shown steady decline way way mining ban imposed china current hash rate contribution stands 4604 united states expanded share 1685 globally contributed 7553 global bitcoin hash rate shown steady decline way mining ban imposed china current hash rate contribution stands 4604 united states expanded share 1685 globally globally cointelegraph also covered instances jurisdictions including russia kazakhstan canada seen greater involvement crypto offering home migrating chinese miners many experts agree china shattered monopoly mining industry signals positive move toward decentralization crypto ecosystem russia russia kazakhstan canada seen greater involvement crypto offering home migrating chinese miners many experts agree china shattered monopoly mining industry signals positive move toward decentralization crypto ecosystem'}\n",
+            "{'segmented_data': {'aggregator_data': {'category_count': {'positive': 1, 'going up': 1, 'negative': 1}, 'max_scores': {'positive': 0.6331064701080322, 'going up': 0.49385496973991394, 'negative': 0.42282581329345703}, 'aggregator_name': 'ClassificationMaxCategories'}}, 'meta': {'title': 'Corporate crypto 101: How companies are using Bitcoin and other digital currency - Fortune', 'description': 'Corporate crypto 101: How companies are using Bitcoin and other digital currency  Fortune', 'published date': 'Thu, 29 Jul 2021 09:18:00 GMT', 'url': 'https://fortune.com/2021/07/29/companies-using-bitcoin-btc-crypto-101/', 'publisher': {'href': 'https://fortune.com', 'title': 'Fortune'}, 'extracted_data': None}, 'source_name': 'GoogleNews', 'processed_text': 'corporate crypto companies using bitcoin digital currency fortune'}\n",
+            "{'segmented_data': {'aggregator_data': {'category_count': {'going down': 3, 'negative': 4, 'going up': 7, 'positive': 5}, 'max_scores': {'going down': 0.991021990776062, 'negative': 0.6630723476409912, 'going up': 0.9641163349151611, 'positive': 0.774677038192749}, 'aggregator_name': 'ClassificationMaxCategories'}}, 'meta': {'title': 'Why Bitcoin, Ethereum, and Other Cryptocurrencies Dropped Today - Motley Fool', 'description': 'Why Bitcoin, Ethereum, and Other Cryptocurrencies Dropped Today  Motley Fool', 'published date': 'Thu, 29 Jul 2021 14:23:00 GMT', 'url': 'https://www.fool.com/investing/2021/07/29/why-bitcoin-ethereum-and-other-cryptocurrencies-dr/', 'publisher': {'href': 'https://www.fool.com', 'title': 'Motley Fool'}, 'extracted_data': {'title': 'Why Bitcoin, Ethereum, and Other Cryptocurrencies Dropped Today | The Motley Fool', 'author': 'Howard Smith', 'hostname': 'fool.com', 'date': '2021-07-29', 'categories': 'investing', 'tags': 'cryptocurrency', 'fingerprint': 'wF6P1vrN7NT9W2UoH2NTw9FaT6k=', 'id': 'e6eb6e49', 'source': 'https://www.fool.com/investing/2021/07/29/why-bitcoin-ethereum-and-other-cryptocurrencies-dr/', 'source-hostname': 'The Motley Fool', 'excerpt': 'A new cryptocurrency tax may help fund the infrastructure bill.'}}, 'source_name': 'GoogleNews', 'processed_text': 'bitcoin ethereum cryptocurrencies dropped today motley fool happened first thought ongoing discussions among politicians regarding large infrastructure bill nt anything bitcoin crypto btc cryptocurrency us senate voted yesterday advance formal negotiations approximately 1 trillion infrastructure infrastructure package cryptocurrencies became part discussion vote one reasons bitcoin ethereum crypto eth dogecoin crypto doge ripple crypto xrp lower today cryptocurrencies opened 2 5 945 edt bitcoin ethereum dogecoin less 1 lower ripple still 35 crypto btc cryptocurrency us senate voted voted yesterday advance formal negotiations approximately 1 trillion infrastructure package cryptocurrencies became part discussion vote one reasons bitcoin ethereum crypto eth dogecoin crypto doge ripple crypto xrp lower today cryptocurrencies opened 2 5 945 edt bitcoin ethereum dogecoin less 1 1 lower ripple still 35 last night senate voted 6732 advance bipartisan infrastructure bill vote included 17 republicans agreeing move legislation forward addition news related new tax retail investors eyeing today initial public offering ipo retail brokerage robinhood nasdaq hood allows trading trading bitcoin ethereum dogecoin cryptocurrencies initial public offering ipo retail brokerage robinhood nasdaq hood allows trading bitcoin ethereum dogecoin cryptocurrencies deal moving infrastructure package forward includes imposing stricter rules cryptocurrency investors would collect new taxes taxes raising 28 billion according bloomberg money would used partially fund 550 billion spending transportation power grid specifically new rules would require businesses report cryptocurrency transactions valued 10000 would also require brokers report transactions involving digital assets internal internal revenue service cryptocurrency transactions valued 10000 would also require brokers report transactions involving digital assets internal revenue service robinhood begin trading publicly today cryptocurrency broker caters retail investors demographic also active trading cryptocurrencies cryptocurrencies today ipo raise robinhood public profile cryptocurrencies also reacting potential new tax rules result active trading currencies short term including today downward moves'}\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    }
+  ]
+}