varunj25 commited on
Commit
050de65
1 Parent(s): 4005740

Autocorrect python file

Browse files
Autocorrect/autocorrectreal.ipynb DELETED
@@ -1,131 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": null,
6
- "metadata": {
7
- "colab": {
8
- "base_uri": "https://localhost:8080/"
9
- },
10
- "id": "wOvxbAShg-_s",
11
- "outputId": "0e9a0f9a-fd6e-4ce0-81f6-8da736bd06be"
12
- },
13
- "outputs": [],
14
- "source": [
15
- "from google.colab import drive\n",
16
- "drive.mount('/content/drive')"
17
- ]
18
- },
19
- {
20
- "cell_type": "code",
21
- "execution_count": null,
22
- "metadata": {
23
- "colab": {
24
- "base_uri": "https://localhost:8080/"
25
- },
26
- "id": "THLGsHmchJ9g",
27
- "outputId": "d590fb47-7b15-4176-9b6e-719090ed2cbd"
28
- },
29
- "outputs": [],
30
- "source": [
31
- "!pip install textdistance"
32
- ]
33
- },
34
- {
35
- "cell_type": "code",
36
- "execution_count": null,
37
- "metadata": {
38
- "id": "eFxAvy03hPCX"
39
- },
40
- "outputs": [],
41
- "source": [
42
- "import re\n",
43
- "from collections import Counter\n",
44
- "import numpy as np\n",
45
- "import pandas as pd\n",
46
- "import textdistance\n",
47
- "\n",
48
- "w = []\n",
49
- "with open('/content/drive/MyDrive/words.txt', 'r') as f:\n",
50
- " file_name_data = f.read()\n",
51
- " file_name_data = file_name_data.lower()\n",
52
- " w = re.findall('\\w+', file_name_data)\n",
53
- "\n",
54
- "v = set(w)"
55
- ]
56
- },
57
- {
58
- "cell_type": "code",
59
- "execution_count": null,
60
- "metadata": {
61
- "colab": {
62
- "base_uri": "https://localhost:8080/"
63
- },
64
- "id": "RPON8Pm7h9Dx",
65
- "outputId": "dd1309fd-3362-41c9-8f19-affe4739df3e"
66
- },
67
- "outputs": [],
68
- "source": [
69
- "print(f\"First 10 words: \\n{w[0:10]}\")\n",
70
- "print(f\"{len(v)} total words \")"
71
- ]
72
- },
73
- {
74
- "cell_type": "code",
75
- "execution_count": null,
76
- "metadata": {
77
- "id": "U4s_UDWKig11"
78
- },
79
- "outputs": [],
80
- "source": [
81
- "from nltk.metrics.distance import edit_distance\n",
82
- "def edit(input_sentence):\n",
83
- " sentence = input_sentence.split()\n",
84
- " \n",
85
- " for i in sentence:\n",
86
- " if i.lower() in w:\n",
87
- " continue\n",
88
- " else:\n",
89
- " distances = ((edit_distance(i,\n",
90
- " word), word)\n",
91
- " for word in w)\n",
92
- " closest = min(distances)\n",
93
- " sentence[sentence.index(i)] = closest[1]\n",
94
- " output_sentence = ' '.join(sentence)\n",
95
- "\n",
96
- " return output_sentence"
97
- ]
98
- },
99
- {
100
- "cell_type": "code",
101
- "execution_count": null,
102
- "metadata": {
103
- "colab": {
104
- "base_uri": "https://localhost:8080/"
105
- },
106
- "id": "c0af01o_i5X0",
107
- "outputId": "fff4600b-163d-40c8-ce3b-c0b735ec286e"
108
- },
109
- "outputs": [],
110
- "source": [
111
- "print(edit(\"My namee is uncele Steven\"))\n",
112
- "print(edit(\"moneeyeh is greeat\"))"
113
- ]
114
- }
115
- ],
116
- "metadata": {
117
- "colab": {
118
- "name": "autocorrectreal.ipynb",
119
- "provenance": []
120
- },
121
- "kernelspec": {
122
- "display_name": "Python 3",
123
- "name": "python3"
124
- },
125
- "language_info": {
126
- "name": "python"
127
- }
128
- },
129
- "nbformat": 4,
130
- "nbformat_minor": 0
131
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Autocorrect/autocorrectreal.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """autocorrectreal.ipynb
3
+
4
+ Automatically generated by Colaboratory.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1aH5mYp1dxyn55XMjtVUllBvg37nqGVir
8
+ """
9
+
10
+ from google.colab import drive
11
+ drive.mount('/content/drive')
12
+
13
+ !pip install textdistance
14
+
15
+ import re
16
+ from collections import Counter
17
+ import numpy as np
18
+ import pandas as pd
19
+ import textdistance
20
+
21
+ w = []
22
+ with open('/content/drive/MyDrive/words.txt', 'r') as f:
23
+ file_name_data = f.read()
24
+ file_name_data = file_name_data.lower()
25
+ w = re.findall('\w+', file_name_data)
26
+
27
+ print(f"First 10 words: \n{w[0:10]}")
28
+ print(f"{len(w)} total words ")
29
+
30
+ from nltk.metrics.distance import edit_distance
31
+ def edit(input_sentence):
32
+ sentence = input_sentence.split()
33
+
34
+ for i in sentence:
35
+ if i.lower() in w:
36
+ continue
37
+ else:
38
+ distances = ((edit_distance(i,word),word)for word in w)
39
+ closest = min(distances)
40
+ sentence[sentence.index(i)] = closest[1]
41
+ output_sentence = ' '.join(sentence)
42
+
43
+ return output_sentence
44
+
45
+ print(edit("My namee is uncele sdtevven"))
46
+ print(edit("moneeyeh is greeat"))
47
+ print(edit("establishe that nitrgen is theh essentil vegchtable as of animal living matter"))