Spaces:

asach
/

Catalog-Digitization

Sleeping

App Files Files Community

Vrushali commited on Feb 10, 2024

Commit

d1f611f

1 Parent(s): f587433

Add new modules and update file paths

Browse files

Files changed (11) hide show

.gitignore +1 -2
src/app/api/module/audio_text.py +49 -0
src/app/api/module/config.py +10 -0
src/app/api/module/image.ipynb +220 -0
src/app/api/module/image_enhance.py +111 -0
src/app/api/module/llm_vision.py +58 -0
src/app/api/module/ocr.py +31 -0
src/app/api/module/product_description.py +26 -0
src/app/api/module/prompts/base.py +35 -0
src/app/api/module/utils.py +44 -0
src/app/api/module/vectorsearch.py +48 -0

.gitignore CHANGED Viewed

@@ -158,6 +158,5 @@ cython_debug/
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
-src/module/data/*
 data/*
-app

 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
+src/app/api/module/data/*
 data/*

src/app/api/module/audio_text.py ADDED Viewed

	@@ -0,0 +1,49 @@

+# from whisper_jax import FlaxWhisperPipline
+# import jax.numpy as jnp
+import whisper
+print(whisper.__file__)
+from openai import OpenAI
+from module.config import OPENAI_API_KEY
+import os
+client = OpenAI()
+os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
+# def whisper_pipeline_tpu(audio):
+#     pipeline = FlaxWhisperPipline("openai/whisper-large-v3", dtype=jnp.bfloat16, batch_size=16)
+#     text = pipeline(audio)
+#     return text
+def whisper_pipeline(audio_path):
+    model = whisper.load_model("medium")
+    # load audio and pad/trim it to fit 30 seconds
+    audio = whisper.load_audio(audio_path)
+    audio = whisper.pad_or_trim(audio)
+    # make log-Mel spectrogram and move to the same device as the model
+    mel = whisper.log_mel_spectrogram(audio).to(model.device)
+    # detect the spoken language
+    _, probs = model.detect_language(mel)
+    print(f"Detected language: {max(probs, key=probs.get)}")
+    # decode the audio
+    options = whisper.DecodingOptions()
+    result = whisper.decode(model, mel, options)
+    # print the recognized text
+    print(result.text)
+    return result.text
+def whisper_openai(audio_path):
+   audio_file= open(audio_path, "rb")
+   transcript = client.audio.transcriptions.create(
+    model="whisper-1",
+    file=audio_file
+   )
+   return transcript
+whisper_pipeline()

src/app/api/module/config.py ADDED Viewed

	@@ -0,0 +1,10 @@

+from decouple import config
+import os
+OPENAI_API_KEY = config('OPENAI_API_KEY', default="")
+key = config("AZURE")
+emmbedding_model = "text-embedding-3-large"
+file_Directory= os.path.join(os.getcwd(), "data")
+endpoint = "https://bintix-ocr.cognitiveservices.azure.com/"

src/app/api/module/image.ipynb ADDED Viewed

	@@ -0,0 +1,220 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31mRunning cells with 'catlognew' requires the ipykernel package.\n",
+      "\u001b[1;31mRun the following command to install 'ipykernel' into the Python environment. \n",
+      "\u001b[1;31mCommand: 'conda install -n catlognew ipykernel --update-deps --force-reinstall'"
+     ]
+    }
+   ],
+   "source": [
+    "import cv2\n",
+    "import os\n",
+    "import numpy as np \n",
+    "from llm_vision import OpenAIVision\n",
+    "from ocr import azure_ocr\n",
+    "from prompts.base import base_prompt\n",
+    "from utils import  extract_json_from_text\n",
+    "from vectorsearch import search , get_detail_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31mFailed to start the Kernel. \n",
+      "\u001b[1;31mUnable to start Kernel 'catlognew (Python)' due to a connection timeout. \n",
+      "\u001b[1;31mView Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
+     ]
+    }
+   ],
+   "source": [
+    "image_path = r\"data/remove_flash.jpg\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "details = azure_ocr(image_path)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'BRU\\nNOW 90/- ONLY'"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "details"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "prompt = base_prompt.format(text = details)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "obj = OpenAIVision()\n",
+    "json = obj.get_image_description(image_path,prompt)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'```json\\n{\\n  \"brand\": \"BRU\",\\n  \"mrp\": \"90/-\",\\n  \"unit\": \"null\",\\n  \"Quantity\": 1,\\n  \"parent_category\": \"BEVERAGES\",\\n  \"ingredients\": \"null\",\\n  \"calorie_count\": \"null\",\\n  \"marketed_by\": \"null\",\\n  \"manufactured_by\": \"null\",\\n  \"manufactured_in_country\": \"null\",\\n  \"type_of_packaging\": \"null\",\\n  \"promotion_on_the_pack\": \"NEW 90/- ONLY\",\\n  \"type_of_product\": \"Instant Coffee\",\\n  \"pack_of_or_no_of_units\": \"null\"\\n}\\n```'"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "json['choices'][0]['message']['content']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "response =  extract_json_from_text(json['choices'][0]['message']['content'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'brand': 'BRU', 'mrp': '90/-', 'unit': 'null', 'Quantity': 1, 'parent_category': 'BEVERAGES', 'ingredients': 'null', 'calorie_count': 'null', 'marketed_by': 'null', 'manufactured_by': 'null', 'manufactured_in_country': 'null', 'type_of_packaging': 'null', 'promotion_on_the_pack': 'NEW 90/- ONLY', 'type_of_product': 'Instant Coffee', 'pack_of_or_no_of_units': 'null'}\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(response)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "ImportError",
+     "evalue": "Could not import chromadb python package. Please install it with `pip install chromadb`.",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mImportError\u001b[0m                               Traceback (most recent call last)",
+      "File \u001b[0;32m~/miniconda3/envs/catlog/lib/python3.10/site-packages/langchain_community/vectorstores/chroma.py:81\u001b[0m, in \u001b[0;36mChroma.__init__\u001b[0;34m(self, collection_name, embedding_function, persist_directory, client_settings, collection_metadata, client, relevance_score_fn)\u001b[0m\n\u001b[1;32m     80\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m---> 81\u001b[0m     \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mchromadb\u001b[39;00m\n\u001b[1;32m     82\u001b[0m     \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mchromadb\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mconfig\u001b[39;00m\n",
+      "File \u001b[0;32m~/miniconda3/envs/catlog/lib/python3.10/site-packages/chromadb/__init__.py:5\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mchromadb\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mapi\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mclient\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m AdminClient \u001b[38;5;28;01mas\u001b[39;00m AdminClientCreator\n\u001b[0;32m----> 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mchromadb\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mauth\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtoken\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m TokenTransportHeader\n\u001b[1;32m      6\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mchromadb\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mconfig\u001b[39;00m\n",
+      "File \u001b[0;32m~/miniconda3/envs/catlog/lib/python3.10/site-packages/chromadb/auth/token/__init__.py:26\u001b[0m\n\u001b[1;32m     25\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mchromadb\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mconfig\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m System\n\u001b[0;32m---> 26\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mchromadb\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtelemetry\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mopentelemetry\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[1;32m     27\u001b[0m     OpenTelemetryGranularity,\n\u001b[1;32m     28\u001b[0m     trace_method,\n\u001b[1;32m     29\u001b[0m )\n\u001b[1;32m     30\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mchromadb\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m get_class\n",
+      "File \u001b[0;32m~/miniconda3/envs/catlog/lib/python3.10/site-packages/chromadb/telemetry/opentelemetry/__init__.py:5\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtyping\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Any, Callable, Dict, Optional, Sequence, Union\n\u001b[0;32m----> 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mopentelemetry\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m trace\n\u001b[1;32m      6\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mopentelemetry\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01msdk\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mresources\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m SERVICE_NAME, Resource\n",
+      "File \u001b[0;32m~/miniconda3/envs/catlog/lib/python3.10/site-packages/opentelemetry/trace/__init__.py:87\u001b[0m\n\u001b[1;32m     85\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mdeprecated\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m deprecated\n\u001b[0;32m---> 87\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mopentelemetry\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m context \u001b[38;5;28;01mas\u001b[39;00m context_api\n\u001b[1;32m     88\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mopentelemetry\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mattributes\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m BoundedAttributes  \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n",
+      "File \u001b[0;32m~/miniconda3/envs/catlog/lib/python3.10/site-packages/opentelemetry/context/__init__.py:25\u001b[0m\n\u001b[1;32m     24\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mopentelemetry\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01menvironment_variables\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m OTEL_PYTHON_CONTEXT\n\u001b[0;32m---> 25\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mopentelemetry\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutil\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_importlib_metadata\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m entry_points\n\u001b[1;32m     27\u001b[0m logger \u001b[38;5;241m=\u001b[39m logging\u001b[38;5;241m.\u001b[39mgetLogger(\u001b[38;5;18m__name__\u001b[39m)\n",
+      "File \u001b[0;32m~/miniconda3/envs/catlog/lib/python3.10/site-packages/opentelemetry/util/_importlib_metadata.py:17\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;66;03m# Copyright The OpenTelemetry Authors\u001b[39;00m\n\u001b[1;32m      2\u001b[0m \u001b[38;5;66;03m#\u001b[39;00m\n\u001b[1;32m      3\u001b[0m \u001b[38;5;66;03m# Licensed under the Apache License, Version 2.0 (the \"License\");\u001b[39;00m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m     15\u001b[0m \u001b[38;5;66;03m# FIXME: Use importlib.metadata when support for 3.11 is dropped if the rest of\u001b[39;00m\n\u001b[1;32m     16\u001b[0m \u001b[38;5;66;03m# the supported versions at that time have the same API.\u001b[39;00m\n\u001b[0;32m---> 17\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mimportlib_metadata\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (  \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m     18\u001b[0m     EntryPoint,\n\u001b[1;32m     19\u001b[0m     EntryPoints,\n\u001b[1;32m     20\u001b[0m     entry_points,\n\u001b[1;32m     21\u001b[0m     version,\n\u001b[1;32m     22\u001b[0m )\n\u001b[1;32m     24\u001b[0m \u001b[38;5;66;03m# The importlib-metadata library has introduced breaking changes before to its\u001b[39;00m\n\u001b[1;32m     25\u001b[0m \u001b[38;5;66;03m# API, this module is kept just to act as a layer between the\u001b[39;00m\n\u001b[1;32m     26\u001b[0m \u001b[38;5;66;03m# importlib-metadata library and our project if in any case it is necessary to\u001b[39;00m\n\u001b[1;32m     27\u001b[0m \u001b[38;5;66;03m# do so.\u001b[39;00m\n",
+      "\u001b[0;31mImportError\u001b[0m: cannot import name 'EntryPoint' from 'importlib_metadata' (unknown location)",
+      "\nDuring handling of the above exception, another exception occurred:\n",
+      "\u001b[0;31mImportError\u001b[0m                               Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[10], line 3\u001b[0m\n\u001b[1;32m      1\u001b[0m name \u001b[38;5;241m=\u001b[39m response[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mbrand\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m \u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m+\u001b[39m response[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtype_of_product\u001b[39m\u001b[38;5;124m'\u001b[39m]\n\u001b[1;32m      2\u001b[0m name \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mBRU\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m----> 3\u001b[0m get_prod_name_db \u001b[38;5;241m=\u001b[39m \u001b[43msearch\u001b[49m\u001b[43m(\u001b[49m\u001b[43mname\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/Catalog-Digitization-/src/app/api/module/vectorsearch.py:30\u001b[0m, in \u001b[0;36msearch\u001b[0;34m(query)\u001b[0m\n\u001b[1;32m     28\u001b[0m embeddings \u001b[38;5;241m=\u001b[39m OpenAIEmbeddings()\n\u001b[1;32m     29\u001b[0m db_path \u001b[38;5;241m=\u001b[39m os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mjoin(file_Directory,\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mvectorstore\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m---> 30\u001b[0m db \u001b[38;5;241m=\u001b[39m \u001b[43mChroma\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpersist_directory\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mdb_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43membedding_function\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43membeddings\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     31\u001b[0m embedding_vector \u001b[38;5;241m=\u001b[39m OpenAIEmbeddings()\u001b[38;5;241m.\u001b[39membed_query(query)\n\u001b[1;32m     32\u001b[0m docs \u001b[38;5;241m=\u001b[39m db\u001b[38;5;241m.\u001b[39msimilarity_search_by_vector(embedding_vector)\n",
+      "File \u001b[0;32m~/miniconda3/envs/catlog/lib/python3.10/site-packages/langchain_community/vectorstores/chroma.py:84\u001b[0m, in \u001b[0;36mChroma.__init__\u001b[0;34m(self, collection_name, embedding_function, persist_directory, client_settings, collection_metadata, client, relevance_score_fn)\u001b[0m\n\u001b[1;32m     82\u001b[0m     \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mchromadb\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mconfig\u001b[39;00m\n\u001b[1;32m     83\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mImportError\u001b[39;00m:\n\u001b[0;32m---> 84\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mImportError\u001b[39;00m(\n\u001b[1;32m     85\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCould not import chromadb python package. \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m     86\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPlease install it with `pip install chromadb`.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m     87\u001b[0m     )\n\u001b[1;32m     89\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m client \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m     90\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_client_settings \u001b[38;5;241m=\u001b[39m client_settings\n",
+      "\u001b[0;31mImportError\u001b[0m: Could not import chromadb python package. Please install it with `pip install chromadb`."
+     ]
+    },
+    {
+     "ename": "",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. \n",
+      "\u001b[1;31mPlease review the code in the cell(s) to identify a possible cause of the failure. \n",
+      "\u001b[1;31mClick <a href='https://aka.ms/vscodeJupyterKernelCrash'>here</a> for more info. \n",
+      "\u001b[1;31mView Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
+     ]
+    }
+   ],
+   "source": [
+    "\n",
+    "name = response['brand'] + \" \" + response['type_of_product']\n",
+    "name = \"BRU\"\n",
+    "get_prod_name_db = search(name)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "catlog",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

src/app/api/module/image_enhance.py ADDED Viewed

	@@ -0,0 +1,111 @@

+import cv2
+import os
+from config import file_Directory
+import numpy as np
+from PIL import Image
+class Image_Enhance():
+    def __init__(self, image_path) -> None:
+        self.image_path = image_path
+    def brightness_Adjust(self):
+        # Load the image
+        image = cv2.imread(self.image_path)
+        #Plot the original image
+        alpha = -1.1
+        # control brightness by 50
+        beta = 70
+        image2 = cv2.convertScaleAbs(image, alpha=alpha, beta=beta)
+        #Save the image
+        # imagepth = os.path.join(os.path.dirname(self.image_path), 'Brightness & contrast.jpg')
+        imagepth = os.path.join(file_Directory, 'Brightness & contrast.jpg')
+        cv2.imwrite(imagepth, image2)
+        return imagepth
+    def remove_flash(self, imagepth):
+        image = cv2.imread(imagepth)
+        # cv2.cvtColor is applied over the
+        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+        # Apply adaptive thresholding to segment the text
+        thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, 11, 4)
+        # Apply Gaussian blur to the grayscale image to reduce noise
+        blurred = cv2.GaussianBlur(gray, (5, 5), 0)
+        # Threshold the blurred image to create a binary mask for the flashlight glare
+        _, mask = cv2.threshold(blurred, 240, 255, cv2.THRESH_BINARY_INV)
+        # Combine the text and glare masks
+        mask = cv2.bitwise_or(mask, thresh)
+        # Apply morphological closing to further remove small areas of glare
+        kernel = np.ones((5,5),np.uint8)
+        mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)
+        # Apply the mask to the original image to remove flashlight glare
+        result = cv2.bitwise_and(image, image, mask=mask)
+        cv2.imwrite(os.path.join(file_Directory, 'remove_flash.jpg'), result)
+    def sharpen(self, imagepth):
+        image = cv2.imread(imagepth)
+        # Create the sharpening kernel
+        kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]])
+        # Sharpen the image
+        sharpened_image = cv2.filter2D(image, -1, kernel)
+        #Save the image
+        imagepath = os.path.join(file_Directory, 'sharpened_image.jpg')
+        cv2.imwrite(imagepath, sharpened_image)
+        return imagepath
+    def lapacian_sharpen(self, imagepth):
+        #Load the image
+        image = cv2.imread(imagepth)
+        # Sharpen the image using the Laplacian operator
+        sharpened_image2 = cv2.Laplacian(image, cv2.CV_64F)
+        imagepath = os.path.join(file_Directory, 'Laplacian_sharpened_image.jpg')
+        #Save the image
+        cv2.imwrite(imagepath, sharpened_image2)
+    def removing_noise(self, imagepth):
+        # Load the image
+        image = cv2.imread(imagepth)
+        # Remove noise using a median filter
+        filtered_image = cv2.medianBlur(image, 1)
+        imagepath = os.path.join(file_Directory, 'Median Blur.jpg')
+        #Save the image
+        cv2.imwrite(imagepath, filtered_image)
+        return imagepath
+    def enhance_color(self, imagepth):
+        # Load the image
+        image = cv2.imread(imagepth)
+        image = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
+        # Adjust the hue, saturation, and value of the image
+        # Adjusts the hue by multiplying it by 0.7
+        image[:, :, 0] = image[:, :, 0] * 0.7
+        # Adjusts the saturation by multiplying it by 1.5
+        image[:, :, 1] = image[:, :, 1] * 1.5
+        # Adjusts the value by multiplying it by 0.5
+        image[:, :, 2] = image[:, :, 2] * 0.5
+        image2 = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)
+        imagepath = os.path.join(file_Directory, 'enhanced coloured.jpg')
+        #Save the image
+        cv2.imwrite(imagepath, image2)
+obj = Image_Enhance(r"data/Catalog Digitization/ONDC Test Data _ Images/Product Images/Bru_Instant_Coffee_Powder.png")
+pth = obj.brightness_Adjust()
+sharpen = obj.sharpen(pth)
+lapacian_sharpen = obj.lapacian_sharpen(sharpen)
+noise = obj.removing_noise(sharpen)
+obj.enhance_color(noise)
+obj.remove_flash(sharpen)

src/app/api/module/llm_vision.py ADDED Viewed

	@@ -0,0 +1,58 @@

+import base64
+import requests
+from config import OPENAI_API_KEY
+import os
+"""
+openai_vision = OpenAIVision(api_key)
+image_path = "path_to_your_image.jpg"
+prompt = ""
+response = openai_vision.get_image_description(prompt,image_path)
+"""
+class OpenAIVision:
+    def __init__(self):
+        self.api_key = OPENAI_API_KEY
+        self.base_url = "https://api.openai.com/v1/chat/completions"
+    def __encode_image(self, image_path):
+        with open(image_path, "rb") as image_file:
+            return base64.b64encode(image_file.read()).decode('utf-8')
+    def get_image_description(self, image_path, prompt):
+        base64_image = self.__encode_image(image_path)
+        headers = {
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {self.api_key}"
+        }
+        payload = {
+            "model": "gpt-4-vision-preview",
+            "temperature": 0.0,
+            "messages": [
+                {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "text",
+                            "text": prompt,
+                        },
+                        {
+                            "type": "image_url",
+                            "image_url": {
+                                "url": f"data:image/jpeg;base64,{base64_image}"
+                            }
+                        }
+                    ]
+                }
+            ],
+            "max_tokens": 1000,
+        }
+        response = requests.post(self.base_url, headers=headers, json=payload)
+        return response.json()

src/app/api/module/ocr.py ADDED Viewed

	@@ -0,0 +1,31 @@

+from azure.ai.formrecognizer import DocumentAnalysisClient
+from azure.core.credentials import AzureKeyCredential
+from config import key, endpoint
+import easyocr
+def azure_ocr(image_path):
+    try:
+        # Create a DocumentAnalysisClient instance
+        document_analysis_client = DocumentAnalysisClient(
+            endpoint=endpoint, credential=AzureKeyCredential(key)
+        )
+        # Open the image file and begin document analysis
+        with open(image_path, "rb") as image_file:
+            poller = document_analysis_client.begin_analyze_document(
+                "prebuilt-read", document=image_file
+            )
+            result = poller.result()
+            return result.content
+    except Exception as e:
+        print('Error occurred:', e)
+        return ""
+def easy_ocr(image_path):
+    try:
+        reader = easyocr.Reader(['en','hi','bn','mr','ta','te'])
+        result = reader.readtext(image_path)
+        return result
+    except Exception as e:
+        print('Error occurred:', e)
+        return []

src/app/api/module/product_description.py ADDED Viewed

	@@ -0,0 +1,26 @@

+import cv2
+import os
+import matplotlib.pyplot as plt
+import numpy as np
+from llm_vision import OpenAIVision
+from ocr import azure_ocr
+from prompts.base import base_prompt
+from utils import  extract_json_from_text
+from vectorsearch import search , get_detail_df
+def get_product_description(image_path):
+    details = azure_ocr(image_path)
+    prompt = base_prompt.format(text = details)
+    obj = OpenAIVision()
+    json = obj.get_image_description(image_path,prompt)
+    response =  extract_json_from_text(json['choices'][0]['message']['content'])
+    return response
+def add_in_db(response):
+    name = response['brand'] + " " + response['type_of_product']
+    get_prod_name_db = search(name)
+    name = get_detail_df(get_prod_name_db)
+    ### Add things into database

src/app/api/module/prompts/base.py ADDED Viewed

	@@ -0,0 +1,35 @@

+from textwrap import dedent
+base_prompt = dedent("""
+        ### Instruction:
+        product description starts here
+        {text}
+        product description ends here
+        this is the categorys list ['BEVERAGES', 'SNACKS & BRANDED FOODS', 'NOT FOUND', 'EGGS, MEAT & FISH', 'FOODGRAINS, OIL & MASALA', 'PERSONAL CARE', 'CLEANING & HOUSEHOLD', 'FRUITS & VEGETABLES', 'BAKERY, CAKES & DAIRY', 'MAKEUP', 'BABY CARE', 'PET FOOD & ACCESSORIES', 'NON FMCG', 'ALCOHOL & TOBACCO', 'WELLNESS', 'EVERYDAY MEDICINE-NEW', 'EXCERCISE & FITNESS', 'ALCOHOLIC BEVERAGES'].
+        Get the text from the product image and the above product description to give me the following details in JSON format:
+        ( return "null" where you don't have a answer)
+        "brand": "sample_brand",
+        "mrp": "The price might start with MRP or Rs.",
+        "unit": "per pack",
+        "Quantity": 1,  ##num of products visible
+        "parent_category": "from the above given list",
+        "ingredients": ["ingredient1", "ingredient2", "ingredient3"],
+        "calorie_count": "Would be in numbers",
+        "marketed_by": "sample_marketer",
+        "manufactured_by": "sample_manufacturer",
+        "manufactured_in_country": "Country XYZ",
+        "type_of_packaging": "Box",
+        "promotion_on_the_pack": "if any",
+        "type_of_product": "give this your understanding",
+        "pack_of_or_no_of_units": "No. of Units"
+        Analyse data from the above product description to give me the following details in JSON format:
+        Only return the output in the required json format.
+        """)

src/app/api/module/utils.py ADDED Viewed

	@@ -0,0 +1,44 @@

+import json
+# with open('Category-tree.json') as f:
+#     cat_data = json.load(f)
+candidate_labels = ['BEVERAGES', 'SNACKS & BRANDED FOODS', 'NOT FOUND', 'EGGS, MEAT & FISH',
+		    'FOODGRAINS, OIL & MASALA', 'PERSONAL CARE', 'CLEANING & HOUSEHOLD',
+		    'FRUITS & VEGETABLES', 'BAKERY, CAKES & DAIRY', 'MAKEUP', 'BABY CARE',
+		    'PET FOOD & ACCESSORIES', 'NON FMCG', 'TOBACCO', 'WELLNESS', 'ALCOHOLIC BEVERAGES']
+def get_childs(parent):
+    catagories = []
+    for category in cat_data:
+        if category['name'] == parent:
+            for child in category['children']:
+                catagories.append(child['name'])
+    return catagories
+def get_inner_child(to_find_parent,to_find_child):
+    catagories = []
+    for parent in cat_data:
+        if parent['name'] == to_find_parent:
+            for child in parent['children']:
+                if child['name'] == to_find_child:
+                    for inner_child in child['children']:
+                        catagories.append(inner_child['name'])
+    return catagories
+def extract_json_from_text(text):
+    text = str(text)
+    try:
+        # Find the JSON part within the text
+        start_index = text.find('{')
+        end_index = text.rfind('}') + 1
+        json_part = text[start_index:end_index]
+        json_part = json.loads(json_part.strip())
+        return json_part
+    except Exception as e:
+        print(f"\033[31m Exception occurred while loading JSON: {str(e)} [0m")
+        return e

src/app/api/module/vectorsearch.py ADDED Viewed

	@@ -0,0 +1,48 @@

+import os
+from config import OPENAI_API_KEY, file_Directory
+from langchain_community.document_loaders.csv_loader import CSVLoader
+from langchain_openai import OpenAIEmbeddings
+from langchain.text_splitter import CharacterTextSplitter
+from langchain_community.vectorstores import Chroma
+import pandas as pd
+os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
+# df = pd.read_excel(r"/home/vrush/Catalog-Digitization-/src/module/data/Catalog Digitization/ONDC Test Data _ Images/ONDCSampleData.xlsx")
+# df_new = pd.DataFrame(columns=["id", "name"])
+# df_new =  df['name']
+# df_new.to_csv(r"data/data.csv", index=False)
+def create_vector():
+    loader = CSVLoader(file_path="data/data.csv")
+    docs = loader.load()
+    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
+    documents = text_splitter.split_documents(docs)
+    db_path = os.path.join(file_Directory,"vectorstore")
+    embeddings = OpenAIEmbeddings()
+    os.makedirs(db_path, exist_ok=True)
+    Chroma.from_documents(docs, embeddings, persist_directory= db_path)
+def search(query):
+    embeddings = OpenAIEmbeddings()
+    db_path = os.path.join(file_Directory,"vectorstore")
+    db = Chroma(persist_directory= db_path, embedding_function= embeddings)
+    embedding_vector = OpenAIEmbeddings().embed_query(query)
+    docs = db.similarity_search_by_vector(embedding_vector)
+    print(docs[0].page_content)
+    return docs[0].page_content
+def get_detail_df(name):
+    df = pd.read_excel(r"/home/vrush/Catalog-Digitization-/src/module/data/Catalog Digitization/ONDC Test Data _ Images/ONDCSampleData.xlsx")
+    for item in df.iterrows():
+        if item['name'] == name:
+            return item
+        else:
+            return None
+if __name__ == "__main__":
+    create_vector()
+    name = search("Choco Creme Wafers")
+    print(get_detail_df(name))