Spaces:

cdactvm
/

Hindi_ASR

Running

File size: 12,816 Bytes

f63b5a2

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "61185b34-45e0-4a78-a84b-2cedd08ad39a",
   "metadata": {},
   "outputs": [],
   "source": [
    "# # Function to convert Hindi text to numerical representation\n",
    "# from isNumber import is_number\n",
    "\n",
    "# def text_to_int (textnum, numwords={}):\n",
    "#     units = ['zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight',\n",
    "#             'nine', 'ten', 'eleven', 'twelve', 'thirteen', 'fourteen', 'fifteen',\n",
    "#             'sixteen', 'seventeen', 'eighteen', 'nineteen']\n",
    "#     tens = ['', '', 'twenty', 'thirty', 'forty', 'fifty', 'sixty', 'seventy', 'eighty', 'ninety']\n",
    "#     scales = ['hundred', 'thousand', 'lac','million', 'billion', 'trillion']\n",
    "#     ordinal_words = {'first':1, 'second':2, 'third':3, 'fifth':5, 'eighth':8, 'ninth':9, 'twelfth':12}\n",
    "#     ordinal_endings = [('ieth', 'y'), ('th', '')]\n",
    "\n",
    "#     if not numwords:\n",
    "#         numwords['and'] = (1, 0)\n",
    "#         for idx, word in enumerate(units): numwords[word] = (1, idx)\n",
    "#         for idx, word in enumerate(tens): numwords[word] = (1, idx * 10)\n",
    "#         for idx, word in enumerate(scales): numwords[word] = (10 ** (idx * 3 or 2), 0)\n",
    "\n",
    "#     textnum = textnum.replace('-', ' ')\n",
    "\n",
    "#     current = result = 0\n",
    "#     curstring = ''\n",
    "#     onnumber = False\n",
    "#     lastunit = False\n",
    "#     lastscale = False\n",
    "\n",
    "#     def is_numword(x):\n",
    "#         if is_number(x):\n",
    "#             return True\n",
    "#         if word in numwords:\n",
    "#             return True\n",
    "#         return False\n",
    "\n",
    "#     def from_numword(x):\n",
    "#         if is_number(x):\n",
    "#             scale = 0\n",
    "#             increment = int(x.replace(',', ''))\n",
    "#             return scale, increment\n",
    "#         return numwords[x]\n",
    "\n",
    "#     for word in textnum.split():\n",
    "#         if word in ordinal_words:\n",
    "#             scale, increment = (1, ordinal_words[word])\n",
    "#             current = current * scale + increment\n",
    "#             if scale > 100:\n",
    "#                 result += current\n",
    "#                 current = 0\n",
    "#             onnumber = True\n",
    "#             lastunit = False\n",
    "#             lastscale = False\n",
    "#         else:\n",
    "#             for ending, replacement in ordinal_endings:\n",
    "#                 if word.endswith(ending):\n",
    "#                     word = \"%s%s\" % (word[:-len(ending)], replacement)\n",
    "\n",
    "#             if (not is_numword(word)) or (word == 'and' and not lastscale):\n",
    "#                 if onnumber:\n",
    "#                     # Flush the current number we are building\n",
    "#                     curstring += repr(result + current) + \" \"\n",
    "#                 curstring += word + \" \"\n",
    "#                 result = current = 0\n",
    "#                 onnumber = False\n",
    "#                 lastunit = False\n",
    "#                 lastscale = False\n",
    "#             else:\n",
    "#                 scale, increment = from_numword(word)\n",
    "#                 onnumber = True\n",
    "\n",
    "#                 if lastunit and (word not in scales):                                                                                                                                                                                                                                         \n",
    "#                     # Assume this is part of a string of individual numbers to                                                                                                                                                                                                                \n",
    "#                     # be flushed, such as a zipcode \"one two three four five\"                                                                                                                                                                                                                 \n",
    "#                     curstring += repr(result + current)                                                                                                                                                                                                                                       \n",
    "#                     result = current = 0                                                                                                                                                                                                                                                      \n",
    "\n",
    "#                 if scale > 1:                                                                                                                                                                                                                                                                 \n",
    "#                     current = max(1, current)                                                                                                                                                                                                                                                 \n",
    "\n",
    "#                 current = current * scale + increment                                                                                                                                                                                                                                         \n",
    "#                 if scale > 100:                                                                                                                                                                                                                                                               \n",
    "#                     result += current                                                                                                                                                                                                                                                         \n",
    "#                     current = 0                                                                                                                                                                                                                                                               \n",
    "\n",
    "#                 lastscale = False                                                                                                                                                                                                              \n",
    "#                 lastunit = False                                                                                                                                                \n",
    "#                 if word in scales:                                                                                                                                                                                                             \n",
    "#                     lastscale = True                                                                                                                                                                                                         \n",
    "#                 elif word in units:                                                                                                                                                                                                             \n",
    "#                     lastunit = True\n",
    "\n",
    "#     if onnumber:\n",
    "#         curstring += repr(result + current)\n",
    "\n",
    "#     return curstring\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a87b26d7-4a0e-4fdc-b03e-1537600faf65",
   "metadata": {},
   "outputs": [],
   "source": [
    "from isNumber import is_number  # Remove or replace this if unnecessary\n",
    "\n",
    "def text_to_int(textnum, numwords={}):\n",
    "    # Define units, tens, and scales including \"lac\"\n",
    "    units = ['zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight',\n",
    "            'nine', 'ten', 'eleven', 'twelve', 'thirteen', 'fourteen', 'fifteen',\n",
    "            'sixteen', 'seventeen', 'eighteen', 'nineteen']\n",
    "    tens = ['', '', 'twenty', 'thirty', 'forty', 'fifty', 'sixty', 'seventy', 'eighty', 'ninety']\n",
    "    scales = ['hundred', 'thousand', 'lac', 'million', 'billion', 'trillion']  # \"lac\" added\n",
    "    ordinal_words = {'first': 1, 'second': 2, 'third': 3, 'fifth': 5, 'eighth': 8, 'ninth': 9, 'twelfth': 12}\n",
    "    ordinal_endings = [('ieth', 'y'), ('th', '')]\n",
    "\n",
    "    if not numwords:\n",
    "        numwords['and'] = (1, 0)  # Handle \"one hundred and twenty\"\n",
    "        \n",
    "        # Add units, tens, and scales to numwords\n",
    "        for idx, word in enumerate(units):\n",
    "            numwords[word] = (1, idx)\n",
    "        for idx, word in enumerate(tens):\n",
    "            numwords[word] = (1, idx * 10)\n",
    "        \n",
    "        for idx, word in enumerate(scales):\n",
    "            numwords[word] = (10 ** (5 if word == 'lac' else idx * 3 or 2), 0)  # Handle \"lac\" as 10^5\n",
    "\n",
    "    # Remove hyphens and normalize input\n",
    "    textnum = textnum.replace('-', ' ')\n",
    "\n",
    "    current = result = 0\n",
    "    curstring = ''\n",
    "    onnumber = False\n",
    "    lastunit = False\n",
    "    lastscale = False\n",
    "\n",
    "    def is_numword(x):\n",
    "        return is_number(x) or x in numwords\n",
    "\n",
    "    def from_numword(x):\n",
    "        if is_number(x):\n",
    "            return 0, int(x.replace(',', ''))\n",
    "        return numwords[x]\n",
    "\n",
    "    for word in textnum.split():\n",
    "        if word in ordinal_words:\n",
    "            scale, increment = (1, ordinal_words[word])\n",
    "            current = current * scale + increment\n",
    "            if scale > 100:\n",
    "                result += current\n",
    "                current = 0\n",
    "            onnumber = True\n",
    "            lastunit = False\n",
    "            lastscale = False\n",
    "        else:\n",
    "            for ending, replacement in ordinal_endings:\n",
    "                if word.endswith(ending):\n",
    "                    word = f\"{word[:-len(ending)]}{replacement}\"\n",
    "\n",
    "            if not is_numword(word) or (word == 'and' and not lastscale):\n",
    "                if onnumber:\n",
    "                    curstring += repr(result + current) + \" \"\n",
    "                curstring += word + \" \"\n",
    "                result = current = 0\n",
    "                onnumber = False\n",
    "                lastunit = False\n",
    "                lastscale = False\n",
    "            else:\n",
    "                scale, increment = from_numword(word)\n",
    "                onnumber = True\n",
    "\n",
    "                if lastunit and word not in scales:\n",
    "                    curstring += repr(result + current) + \" \"\n",
    "                    result = current = 0\n",
    "\n",
    "                if scale > 1:\n",
    "                    current = max(1, current)\n",
    "\n",
    "                current = current * scale + increment\n",
    "\n",
    "                if scale >= 100:\n",
    "                    result += current\n",
    "                    current = 0\n",
    "\n",
    "                lastscale = word in scales\n",
    "                lastunit = word in units\n",
    "\n",
    "    if onnumber:\n",
    "        curstring += repr(result + current)\n",
    "\n",
    "    return curstring.strip()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "83997c73-e1b4-4863-b1df-d6de6153e80d",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}