{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "wOvxbAShg-_s", "outputId": "0e9a0f9a-fd6e-4ce0-81f6-8da736bd06be" }, "outputs": [], "source": [ "from google.colab import drive\n", "drive.mount('/content/drive')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "THLGsHmchJ9g", "outputId": "d590fb47-7b15-4176-9b6e-719090ed2cbd" }, "outputs": [], "source": [ "!pip install textdistance" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "eFxAvy03hPCX" }, "outputs": [], "source": [ "import re\n", "from collections import Counter\n", "import numpy as np\n", "import pandas as pd\n", "import textdistance\n", "\n", "w = []\n", "with open('/content/drive/MyDrive/words.txt', 'r') as f:\n", " file_name_data = f.read()\n", " file_name_data = file_name_data.lower()\n", " w = re.findall('\\w+', file_name_data)\n", "\n", "v = set(w)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "RPON8Pm7h9Dx", "outputId": "dd1309fd-3362-41c9-8f19-affe4739df3e" }, "outputs": [], "source": [ "print(f\"First 10 words: \\n{w[0:10]}\")\n", "print(f\"{len(v)} total words \")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "U4s_UDWKig11" }, "outputs": [], "source": [ "from nltk.metrics.distance import edit_distance\n", "def edit(input_sentence):\n", " sentence = input_sentence.split()\n", " \n", " for i in sentence:\n", " if i.lower() in w:\n", " continue\n", " else:\n", " distances = ((edit_distance(i,\n", " word), word)\n", " for word in w)\n", " closest = min(distances)\n", " sentence[sentence.index(i)] = closest[1]\n", " output_sentence = ' '.join(sentence)\n", "\n", " return output_sentence" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "c0af01o_i5X0", "outputId": "fff4600b-163d-40c8-ce3b-c0b735ec286e" }, "outputs": [], "source": [ "print(edit(\"My namee is uncele Steven\"))\n", "print(edit(\"moneeyeh is greeat\"))" ] } ], "metadata": { "colab": { "name": "autocorrectreal.ipynb", "provenance": [] }, "kernelspec": { "display_name": "Python 3", "name": "python3" }, "language_info": { "name": "python" } }, "nbformat": 4, "nbformat_minor": 0 }