Vrushali commited on
Commit
d1f611f
·
1 Parent(s): f587433

Add new modules and update file paths

Browse files
.gitignore CHANGED
@@ -158,6 +158,5 @@ cython_debug/
158
  # and can be added to the global gitignore or merged into this file. For a more nuclear
159
  # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160
  #.idea/
161
- src/module/data/*
162
  data/*
163
- app
 
158
  # and can be added to the global gitignore or merged into this file. For a more nuclear
159
  # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160
  #.idea/
161
+ src/app/api/module/data/*
162
  data/*
 
src/app/api/module/audio_text.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # from whisper_jax import FlaxWhisperPipline
2
+ # import jax.numpy as jnp
3
+ import whisper
4
+ print(whisper.__file__)
5
+ from openai import OpenAI
6
+ from module.config import OPENAI_API_KEY
7
+ import os
8
+
9
+ client = OpenAI()
10
+ os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
11
+
12
+
13
+ # def whisper_pipeline_tpu(audio):
14
+ # pipeline = FlaxWhisperPipline("openai/whisper-large-v3", dtype=jnp.bfloat16, batch_size=16)
15
+ # text = pipeline(audio)
16
+ # return text
17
+
18
+
19
+
20
+ def whisper_pipeline(audio_path):
21
+ model = whisper.load_model("medium")
22
+ # load audio and pad/trim it to fit 30 seconds
23
+ audio = whisper.load_audio(audio_path)
24
+ audio = whisper.pad_or_trim(audio)
25
+ # make log-Mel spectrogram and move to the same device as the model
26
+ mel = whisper.log_mel_spectrogram(audio).to(model.device)
27
+ # detect the spoken language
28
+ _, probs = model.detect_language(mel)
29
+ print(f"Detected language: {max(probs, key=probs.get)}")
30
+ # decode the audio
31
+ options = whisper.DecodingOptions()
32
+ result = whisper.decode(model, mel, options)
33
+ # print the recognized text
34
+ print(result.text)
35
+ return result.text
36
+
37
+
38
+
39
+
40
+
41
+ def whisper_openai(audio_path):
42
+ audio_file= open(audio_path, "rb")
43
+ transcript = client.audio.transcriptions.create(
44
+ model="whisper-1",
45
+ file=audio_file
46
+ )
47
+ return transcript
48
+
49
+ whisper_pipeline()
src/app/api/module/config.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ from decouple import config
2
+ import os
3
+
4
+ OPENAI_API_KEY = config('OPENAI_API_KEY', default="")
5
+ key = config("AZURE")
6
+ emmbedding_model = "text-embedding-3-large"
7
+
8
+ file_Directory= os.path.join(os.getcwd(), "data")
9
+
10
+ endpoint = "https://bintix-ocr.cognitiveservices.azure.com/"
src/app/api/module/image.ipynb ADDED
@@ -0,0 +1,220 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "ename": "",
10
+ "evalue": "",
11
+ "output_type": "error",
12
+ "traceback": [
13
+ "\u001b[1;31mRunning cells with 'catlognew' requires the ipykernel package.\n",
14
+ "\u001b[1;31mRun the following command to install 'ipykernel' into the Python environment. \n",
15
+ "\u001b[1;31mCommand: 'conda install -n catlognew ipykernel --update-deps --force-reinstall'"
16
+ ]
17
+ }
18
+ ],
19
+ "source": [
20
+ "import cv2\n",
21
+ "import os\n",
22
+ "import numpy as np \n",
23
+ "from llm_vision import OpenAIVision\n",
24
+ "from ocr import azure_ocr\n",
25
+ "from prompts.base import base_prompt\n",
26
+ "from utils import extract_json_from_text\n",
27
+ "from vectorsearch import search , get_detail_df"
28
+ ]
29
+ },
30
+ {
31
+ "cell_type": "code",
32
+ "execution_count": null,
33
+ "metadata": {},
34
+ "outputs": [
35
+ {
36
+ "ename": "",
37
+ "evalue": "",
38
+ "output_type": "error",
39
+ "traceback": [
40
+ "\u001b[1;31mFailed to start the Kernel. \n",
41
+ "\u001b[1;31mUnable to start Kernel 'catlognew (Python)' due to a connection timeout. \n",
42
+ "\u001b[1;31mView Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
43
+ ]
44
+ }
45
+ ],
46
+ "source": [
47
+ "image_path = r\"data/remove_flash.jpg\""
48
+ ]
49
+ },
50
+ {
51
+ "cell_type": "code",
52
+ "execution_count": 3,
53
+ "metadata": {},
54
+ "outputs": [],
55
+ "source": [
56
+ "details = azure_ocr(image_path)"
57
+ ]
58
+ },
59
+ {
60
+ "cell_type": "code",
61
+ "execution_count": 4,
62
+ "metadata": {},
63
+ "outputs": [
64
+ {
65
+ "data": {
66
+ "text/plain": [
67
+ "'BRU\\nNOW 90/- ONLY'"
68
+ ]
69
+ },
70
+ "execution_count": 4,
71
+ "metadata": {},
72
+ "output_type": "execute_result"
73
+ }
74
+ ],
75
+ "source": [
76
+ "details"
77
+ ]
78
+ },
79
+ {
80
+ "cell_type": "code",
81
+ "execution_count": 5,
82
+ "metadata": {},
83
+ "outputs": [],
84
+ "source": [
85
+ "prompt = base_prompt.format(text = details)"
86
+ ]
87
+ },
88
+ {
89
+ "cell_type": "code",
90
+ "execution_count": 6,
91
+ "metadata": {},
92
+ "outputs": [],
93
+ "source": [
94
+ "obj = OpenAIVision()\n",
95
+ "json = obj.get_image_description(image_path,prompt)"
96
+ ]
97
+ },
98
+ {
99
+ "cell_type": "code",
100
+ "execution_count": 7,
101
+ "metadata": {},
102
+ "outputs": [
103
+ {
104
+ "data": {
105
+ "text/plain": [
106
+ "'```json\\n{\\n \"brand\": \"BRU\",\\n \"mrp\": \"90/-\",\\n \"unit\": \"null\",\\n \"Quantity\": 1,\\n \"parent_category\": \"BEVERAGES\",\\n \"ingredients\": \"null\",\\n \"calorie_count\": \"null\",\\n \"marketed_by\": \"null\",\\n \"manufactured_by\": \"null\",\\n \"manufactured_in_country\": \"null\",\\n \"type_of_packaging\": \"null\",\\n \"promotion_on_the_pack\": \"NEW 90/- ONLY\",\\n \"type_of_product\": \"Instant Coffee\",\\n \"pack_of_or_no_of_units\": \"null\"\\n}\\n```'"
107
+ ]
108
+ },
109
+ "execution_count": 7,
110
+ "metadata": {},
111
+ "output_type": "execute_result"
112
+ }
113
+ ],
114
+ "source": [
115
+ "json['choices'][0]['message']['content']"
116
+ ]
117
+ },
118
+ {
119
+ "cell_type": "code",
120
+ "execution_count": 8,
121
+ "metadata": {},
122
+ "outputs": [],
123
+ "source": [
124
+ "response = extract_json_from_text(json['choices'][0]['message']['content'])"
125
+ ]
126
+ },
127
+ {
128
+ "cell_type": "code",
129
+ "execution_count": 9,
130
+ "metadata": {},
131
+ "outputs": [
132
+ {
133
+ "name": "stdout",
134
+ "output_type": "stream",
135
+ "text": [
136
+ "{'brand': 'BRU', 'mrp': '90/-', 'unit': 'null', 'Quantity': 1, 'parent_category': 'BEVERAGES', 'ingredients': 'null', 'calorie_count': 'null', 'marketed_by': 'null', 'manufactured_by': 'null', 'manufactured_in_country': 'null', 'type_of_packaging': 'null', 'promotion_on_the_pack': 'NEW 90/- ONLY', 'type_of_product': 'Instant Coffee', 'pack_of_or_no_of_units': 'null'}\n"
137
+ ]
138
+ }
139
+ ],
140
+ "source": [
141
+ "print(response)"
142
+ ]
143
+ },
144
+ {
145
+ "cell_type": "code",
146
+ "execution_count": 10,
147
+ "metadata": {},
148
+ "outputs": [
149
+ {
150
+ "ename": "ImportError",
151
+ "evalue": "Could not import chromadb python package. Please install it with `pip install chromadb`.",
152
+ "output_type": "error",
153
+ "traceback": [
154
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
155
+ "\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)",
156
+ "File \u001b[0;32m~/miniconda3/envs/catlog/lib/python3.10/site-packages/langchain_community/vectorstores/chroma.py:81\u001b[0m, in \u001b[0;36mChroma.__init__\u001b[0;34m(self, collection_name, embedding_function, persist_directory, client_settings, collection_metadata, client, relevance_score_fn)\u001b[0m\n\u001b[1;32m 80\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m---> 81\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mchromadb\u001b[39;00m\n\u001b[1;32m 82\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mchromadb\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mconfig\u001b[39;00m\n",
157
+ "File \u001b[0;32m~/miniconda3/envs/catlog/lib/python3.10/site-packages/chromadb/__init__.py:5\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mchromadb\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mapi\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mclient\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m AdminClient \u001b[38;5;28;01mas\u001b[39;00m AdminClientCreator\n\u001b[0;32m----> 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mchromadb\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mauth\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtoken\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m TokenTransportHeader\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mchromadb\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mconfig\u001b[39;00m\n",
158
+ "File \u001b[0;32m~/miniconda3/envs/catlog/lib/python3.10/site-packages/chromadb/auth/token/__init__.py:26\u001b[0m\n\u001b[1;32m 25\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mchromadb\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mconfig\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m System\n\u001b[0;32m---> 26\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mchromadb\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtelemetry\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mopentelemetry\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[1;32m 27\u001b[0m OpenTelemetryGranularity,\n\u001b[1;32m 28\u001b[0m trace_method,\n\u001b[1;32m 29\u001b[0m )\n\u001b[1;32m 30\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mchromadb\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m get_class\n",
159
+ "File \u001b[0;32m~/miniconda3/envs/catlog/lib/python3.10/site-packages/chromadb/telemetry/opentelemetry/__init__.py:5\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtyping\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Any, Callable, Dict, Optional, Sequence, Union\n\u001b[0;32m----> 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mopentelemetry\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m trace\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mopentelemetry\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01msdk\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mresources\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m SERVICE_NAME, Resource\n",
160
+ "File \u001b[0;32m~/miniconda3/envs/catlog/lib/python3.10/site-packages/opentelemetry/trace/__init__.py:87\u001b[0m\n\u001b[1;32m 85\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mdeprecated\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m deprecated\n\u001b[0;32m---> 87\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mopentelemetry\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m context \u001b[38;5;28;01mas\u001b[39;00m context_api\n\u001b[1;32m 88\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mopentelemetry\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mattributes\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m BoundedAttributes \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n",
161
+ "File \u001b[0;32m~/miniconda3/envs/catlog/lib/python3.10/site-packages/opentelemetry/context/__init__.py:25\u001b[0m\n\u001b[1;32m 24\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mopentelemetry\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01menvironment_variables\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m OTEL_PYTHON_CONTEXT\n\u001b[0;32m---> 25\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mopentelemetry\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutil\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_importlib_metadata\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m entry_points\n\u001b[1;32m 27\u001b[0m logger \u001b[38;5;241m=\u001b[39m logging\u001b[38;5;241m.\u001b[39mgetLogger(\u001b[38;5;18m__name__\u001b[39m)\n",
162
+ "File \u001b[0;32m~/miniconda3/envs/catlog/lib/python3.10/site-packages/opentelemetry/util/_importlib_metadata.py:17\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# Copyright The OpenTelemetry Authors\u001b[39;00m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;66;03m#\u001b[39;00m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;66;03m# Licensed under the Apache License, Version 2.0 (the \"License\");\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 15\u001b[0m \u001b[38;5;66;03m# FIXME: Use importlib.metadata when support for 3.11 is dropped if the rest of\u001b[39;00m\n\u001b[1;32m 16\u001b[0m \u001b[38;5;66;03m# the supported versions at that time have the same API.\u001b[39;00m\n\u001b[0;32m---> 17\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mimportlib_metadata\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m ( \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 18\u001b[0m EntryPoint,\n\u001b[1;32m 19\u001b[0m EntryPoints,\n\u001b[1;32m 20\u001b[0m entry_points,\n\u001b[1;32m 21\u001b[0m version,\n\u001b[1;32m 22\u001b[0m )\n\u001b[1;32m 24\u001b[0m \u001b[38;5;66;03m# The importlib-metadata library has introduced breaking changes before to its\u001b[39;00m\n\u001b[1;32m 25\u001b[0m \u001b[38;5;66;03m# API, this module is kept just to act as a layer between the\u001b[39;00m\n\u001b[1;32m 26\u001b[0m \u001b[38;5;66;03m# importlib-metadata library and our project if in any case it is necessary to\u001b[39;00m\n\u001b[1;32m 27\u001b[0m \u001b[38;5;66;03m# do so.\u001b[39;00m\n",
163
+ "\u001b[0;31mImportError\u001b[0m: cannot import name 'EntryPoint' from 'importlib_metadata' (unknown location)",
164
+ "\nDuring handling of the above exception, another exception occurred:\n",
165
+ "\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)",
166
+ "Cell \u001b[0;32mIn[10], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m name \u001b[38;5;241m=\u001b[39m response[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mbrand\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m \u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m+\u001b[39m response[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtype_of_product\u001b[39m\u001b[38;5;124m'\u001b[39m]\n\u001b[1;32m 2\u001b[0m name \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mBRU\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m----> 3\u001b[0m get_prod_name_db \u001b[38;5;241m=\u001b[39m \u001b[43msearch\u001b[49m\u001b[43m(\u001b[49m\u001b[43mname\u001b[49m\u001b[43m)\u001b[49m\n",
167
+ "File \u001b[0;32m~/Catalog-Digitization-/src/app/api/module/vectorsearch.py:30\u001b[0m, in \u001b[0;36msearch\u001b[0;34m(query)\u001b[0m\n\u001b[1;32m 28\u001b[0m embeddings \u001b[38;5;241m=\u001b[39m OpenAIEmbeddings()\n\u001b[1;32m 29\u001b[0m db_path \u001b[38;5;241m=\u001b[39m os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mjoin(file_Directory,\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mvectorstore\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m---> 30\u001b[0m db \u001b[38;5;241m=\u001b[39m \u001b[43mChroma\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpersist_directory\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mdb_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43membedding_function\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43membeddings\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 31\u001b[0m embedding_vector \u001b[38;5;241m=\u001b[39m OpenAIEmbeddings()\u001b[38;5;241m.\u001b[39membed_query(query)\n\u001b[1;32m 32\u001b[0m docs \u001b[38;5;241m=\u001b[39m db\u001b[38;5;241m.\u001b[39msimilarity_search_by_vector(embedding_vector)\n",
168
+ "File \u001b[0;32m~/miniconda3/envs/catlog/lib/python3.10/site-packages/langchain_community/vectorstores/chroma.py:84\u001b[0m, in \u001b[0;36mChroma.__init__\u001b[0;34m(self, collection_name, embedding_function, persist_directory, client_settings, collection_metadata, client, relevance_score_fn)\u001b[0m\n\u001b[1;32m 82\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mchromadb\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mconfig\u001b[39;00m\n\u001b[1;32m 83\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mImportError\u001b[39;00m:\n\u001b[0;32m---> 84\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mImportError\u001b[39;00m(\n\u001b[1;32m 85\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCould not import chromadb python package. \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 86\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPlease install it with `pip install chromadb`.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 87\u001b[0m )\n\u001b[1;32m 89\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m client \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 90\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_client_settings \u001b[38;5;241m=\u001b[39m client_settings\n",
169
+ "\u001b[0;31mImportError\u001b[0m: Could not import chromadb python package. Please install it with `pip install chromadb`."
170
+ ]
171
+ },
172
+ {
173
+ "ename": "",
174
+ "evalue": "",
175
+ "output_type": "error",
176
+ "traceback": [
177
+ "\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. \n",
178
+ "\u001b[1;31mPlease review the code in the cell(s) to identify a possible cause of the failure. \n",
179
+ "\u001b[1;31mClick <a href='https://aka.ms/vscodeJupyterKernelCrash'>here</a> for more info. \n",
180
+ "\u001b[1;31mView Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
181
+ ]
182
+ }
183
+ ],
184
+ "source": [
185
+ "\n",
186
+ "name = response['brand'] + \" \" + response['type_of_product']\n",
187
+ "name = \"BRU\"\n",
188
+ "get_prod_name_db = search(name)"
189
+ ]
190
+ },
191
+ {
192
+ "cell_type": "code",
193
+ "execution_count": null,
194
+ "metadata": {},
195
+ "outputs": [],
196
+ "source": []
197
+ }
198
+ ],
199
+ "metadata": {
200
+ "kernelspec": {
201
+ "display_name": "catlog",
202
+ "language": "python",
203
+ "name": "python3"
204
+ },
205
+ "language_info": {
206
+ "codemirror_mode": {
207
+ "name": "ipython",
208
+ "version": 3
209
+ },
210
+ "file_extension": ".py",
211
+ "mimetype": "text/x-python",
212
+ "name": "python",
213
+ "nbconvert_exporter": "python",
214
+ "pygments_lexer": "ipython3",
215
+ "version": "3.10.0"
216
+ }
217
+ },
218
+ "nbformat": 4,
219
+ "nbformat_minor": 2
220
+ }
src/app/api/module/image_enhance.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import os
3
+ from config import file_Directory
4
+ import numpy as np
5
+ from PIL import Image
6
+
7
+ class Image_Enhance():
8
+
9
+ def __init__(self, image_path) -> None:
10
+ self.image_path = image_path
11
+
12
+ def brightness_Adjust(self):
13
+ # Load the image
14
+ image = cv2.imread(self.image_path)
15
+ #Plot the original image
16
+ alpha = -1.1
17
+ # control brightness by 50
18
+ beta = 70
19
+ image2 = cv2.convertScaleAbs(image, alpha=alpha, beta=beta)
20
+ #Save the image
21
+ # imagepth = os.path.join(os.path.dirname(self.image_path), 'Brightness & contrast.jpg')
22
+ imagepth = os.path.join(file_Directory, 'Brightness & contrast.jpg')
23
+ cv2.imwrite(imagepth, image2)
24
+ return imagepth
25
+
26
+ def remove_flash(self, imagepth):
27
+ image = cv2.imread(imagepth)
28
+ # cv2.cvtColor is applied over the
29
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
30
+
31
+ # Apply adaptive thresholding to segment the text
32
+ thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, 11, 4)
33
+
34
+ # Apply Gaussian blur to the grayscale image to reduce noise
35
+ blurred = cv2.GaussianBlur(gray, (5, 5), 0)
36
+
37
+ # Threshold the blurred image to create a binary mask for the flashlight glare
38
+ _, mask = cv2.threshold(blurred, 240, 255, cv2.THRESH_BINARY_INV)
39
+
40
+ # Combine the text and glare masks
41
+ mask = cv2.bitwise_or(mask, thresh)
42
+
43
+ # Apply morphological closing to further remove small areas of glare
44
+ kernel = np.ones((5,5),np.uint8)
45
+ mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)
46
+
47
+ # Apply the mask to the original image to remove flashlight glare
48
+ result = cv2.bitwise_and(image, image, mask=mask)
49
+
50
+ cv2.imwrite(os.path.join(file_Directory, 'remove_flash.jpg'), result)
51
+
52
+ def sharpen(self, imagepth):
53
+ image = cv2.imread(imagepth)
54
+ # Create the sharpening kernel
55
+ kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]])
56
+ # Sharpen the image
57
+ sharpened_image = cv2.filter2D(image, -1, kernel)
58
+ #Save the image
59
+ imagepath = os.path.join(file_Directory, 'sharpened_image.jpg')
60
+ cv2.imwrite(imagepath, sharpened_image)
61
+ return imagepath
62
+
63
+
64
+ def lapacian_sharpen(self, imagepth):
65
+ #Load the image
66
+ image = cv2.imread(imagepth)
67
+
68
+ # Sharpen the image using the Laplacian operator
69
+ sharpened_image2 = cv2.Laplacian(image, cv2.CV_64F)
70
+ imagepath = os.path.join(file_Directory, 'Laplacian_sharpened_image.jpg')
71
+ #Save the image
72
+ cv2.imwrite(imagepath, sharpened_image2)
73
+
74
+ def removing_noise(self, imagepth):
75
+ # Load the image
76
+ image = cv2.imread(imagepth)
77
+ # Remove noise using a median filter
78
+ filtered_image = cv2.medianBlur(image, 1)
79
+ imagepath = os.path.join(file_Directory, 'Median Blur.jpg')
80
+ #Save the image
81
+ cv2.imwrite(imagepath, filtered_image)
82
+
83
+ return imagepath
84
+
85
+
86
+ def enhance_color(self, imagepth):
87
+ # Load the image
88
+ image = cv2.imread(imagepth)
89
+ image = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
90
+
91
+ # Adjust the hue, saturation, and value of the image
92
+ # Adjusts the hue by multiplying it by 0.7
93
+ image[:, :, 0] = image[:, :, 0] * 0.7
94
+ # Adjusts the saturation by multiplying it by 1.5
95
+ image[:, :, 1] = image[:, :, 1] * 1.5
96
+ # Adjusts the value by multiplying it by 0.5
97
+ image[:, :, 2] = image[:, :, 2] * 0.5
98
+
99
+ image2 = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)
100
+ imagepath = os.path.join(file_Directory, 'enhanced coloured.jpg')
101
+ #Save the image
102
+ cv2.imwrite(imagepath, image2)
103
+
104
+
105
+ obj = Image_Enhance(r"data/Catalog Digitization/ONDC Test Data _ Images/Product Images/Bru_Instant_Coffee_Powder.png")
106
+ pth = obj.brightness_Adjust()
107
+ sharpen = obj.sharpen(pth)
108
+ lapacian_sharpen = obj.lapacian_sharpen(sharpen)
109
+ noise = obj.removing_noise(sharpen)
110
+ obj.enhance_color(noise)
111
+ obj.remove_flash(sharpen)
src/app/api/module/llm_vision.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import requests
3
+ from config import OPENAI_API_KEY
4
+ import os
5
+
6
+
7
+ """
8
+ openai_vision = OpenAIVision(api_key)
9
+ image_path = "path_to_your_image.jpg"
10
+ prompt = ""
11
+ response = openai_vision.get_image_description(prompt,image_path)
12
+ """
13
+
14
+ class OpenAIVision:
15
+ def __init__(self):
16
+ self.api_key = OPENAI_API_KEY
17
+ self.base_url = "https://api.openai.com/v1/chat/completions"
18
+
19
+ def __encode_image(self, image_path):
20
+ with open(image_path, "rb") as image_file:
21
+ return base64.b64encode(image_file.read()).decode('utf-8')
22
+
23
+ def get_image_description(self, image_path, prompt):
24
+ base64_image = self.__encode_image(image_path)
25
+
26
+ headers = {
27
+ "Content-Type": "application/json",
28
+ "Authorization": f"Bearer {self.api_key}"
29
+ }
30
+
31
+ payload = {
32
+ "model": "gpt-4-vision-preview",
33
+ "temperature": 0.0,
34
+ "messages": [
35
+ {
36
+ "role": "user",
37
+ "content": [
38
+ {
39
+ "type": "text",
40
+ "text": prompt,
41
+ },
42
+ {
43
+ "type": "image_url",
44
+ "image_url": {
45
+ "url": f"data:image/jpeg;base64,{base64_image}"
46
+ }
47
+ }
48
+
49
+ ]
50
+
51
+ }
52
+ ],
53
+ "max_tokens": 1000,
54
+
55
+ }
56
+
57
+ response = requests.post(self.base_url, headers=headers, json=payload)
58
+ return response.json()
src/app/api/module/ocr.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from azure.ai.formrecognizer import DocumentAnalysisClient
2
+ from azure.core.credentials import AzureKeyCredential
3
+ from config import key, endpoint
4
+ import easyocr
5
+
6
+ def azure_ocr(image_path):
7
+ try:
8
+ # Create a DocumentAnalysisClient instance
9
+ document_analysis_client = DocumentAnalysisClient(
10
+ endpoint=endpoint, credential=AzureKeyCredential(key)
11
+ )
12
+
13
+ # Open the image file and begin document analysis
14
+ with open(image_path, "rb") as image_file:
15
+ poller = document_analysis_client.begin_analyze_document(
16
+ "prebuilt-read", document=image_file
17
+ )
18
+ result = poller.result()
19
+ return result.content
20
+ except Exception as e:
21
+ print('Error occurred:', e)
22
+ return ""
23
+
24
+ def easy_ocr(image_path):
25
+ try:
26
+ reader = easyocr.Reader(['en','hi','bn','mr','ta','te'])
27
+ result = reader.readtext(image_path)
28
+ return result
29
+ except Exception as e:
30
+ print('Error occurred:', e)
31
+ return []
src/app/api/module/product_description.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import os
3
+ import matplotlib.pyplot as plt
4
+ import numpy as np
5
+ from llm_vision import OpenAIVision
6
+ from ocr import azure_ocr
7
+ from prompts.base import base_prompt
8
+ from utils import extract_json_from_text
9
+ from vectorsearch import search , get_detail_df
10
+
11
+
12
+
13
+ def get_product_description(image_path):
14
+ details = azure_ocr(image_path)
15
+ prompt = base_prompt.format(text = details)
16
+ obj = OpenAIVision()
17
+ json = obj.get_image_description(image_path,prompt)
18
+ response = extract_json_from_text(json['choices'][0]['message']['content'])
19
+
20
+ return response
21
+
22
+ def add_in_db(response):
23
+ name = response['brand'] + " " + response['type_of_product']
24
+ get_prod_name_db = search(name)
25
+ name = get_detail_df(get_prod_name_db)
26
+ ### Add things into database
src/app/api/module/prompts/base.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from textwrap import dedent
2
+
3
+ base_prompt = dedent("""
4
+ ### Instruction:
5
+
6
+ product description starts here
7
+
8
+ {text}
9
+
10
+ product description ends here
11
+
12
+ this is the categorys list ['BEVERAGES', 'SNACKS & BRANDED FOODS', 'NOT FOUND', 'EGGS, MEAT & FISH', 'FOODGRAINS, OIL & MASALA', 'PERSONAL CARE', 'CLEANING & HOUSEHOLD', 'FRUITS & VEGETABLES', 'BAKERY, CAKES & DAIRY', 'MAKEUP', 'BABY CARE', 'PET FOOD & ACCESSORIES', 'NON FMCG', 'ALCOHOL & TOBACCO', 'WELLNESS', 'EVERYDAY MEDICINE-NEW', 'EXCERCISE & FITNESS', 'ALCOHOLIC BEVERAGES'].
13
+
14
+ Get the text from the product image and the above product description to give me the following details in JSON format:
15
+ ( return "null" where you don't have a answer)
16
+
17
+ "brand": "sample_brand",
18
+ "mrp": "The price might start with MRP or Rs.",
19
+ "unit": "per pack",
20
+ "Quantity": 1, ##num of products visible
21
+ "parent_category": "from the above given list",
22
+ "ingredients": ["ingredient1", "ingredient2", "ingredient3"],
23
+ "calorie_count": "Would be in numbers",
24
+ "marketed_by": "sample_marketer",
25
+ "manufactured_by": "sample_manufacturer",
26
+ "manufactured_in_country": "Country XYZ",
27
+ "type_of_packaging": "Box",
28
+ "promotion_on_the_pack": "if any",
29
+ "type_of_product": "give this your understanding",
30
+ "pack_of_or_no_of_units": "No. of Units"
31
+
32
+
33
+ Analyse data from the above product description to give me the following details in JSON format:
34
+ Only return the output in the required json format.
35
+ """)
src/app/api/module/utils.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+
3
+ # with open('Category-tree.json') as f:
4
+ # cat_data = json.load(f)
5
+
6
+ candidate_labels = ['BEVERAGES', 'SNACKS & BRANDED FOODS', 'NOT FOUND', 'EGGS, MEAT & FISH',
7
+ 'FOODGRAINS, OIL & MASALA', 'PERSONAL CARE', 'CLEANING & HOUSEHOLD',
8
+ 'FRUITS & VEGETABLES', 'BAKERY, CAKES & DAIRY', 'MAKEUP', 'BABY CARE',
9
+ 'PET FOOD & ACCESSORIES', 'NON FMCG', 'TOBACCO', 'WELLNESS', 'ALCOHOLIC BEVERAGES']
10
+
11
+ def get_childs(parent):
12
+ catagories = []
13
+ for category in cat_data:
14
+ if category['name'] == parent:
15
+ for child in category['children']:
16
+ catagories.append(child['name'])
17
+ return catagories
18
+
19
+ def get_inner_child(to_find_parent,to_find_child):
20
+ catagories = []
21
+ for parent in cat_data:
22
+ if parent['name'] == to_find_parent:
23
+ for child in parent['children']:
24
+ if child['name'] == to_find_child:
25
+ for inner_child in child['children']:
26
+ catagories.append(inner_child['name'])
27
+ return catagories
28
+
29
+
30
+
31
+ def extract_json_from_text(text):
32
+ text = str(text)
33
+
34
+ try:
35
+ # Find the JSON part within the text
36
+ start_index = text.find('{')
37
+ end_index = text.rfind('}') + 1
38
+ json_part = text[start_index:end_index]
39
+ json_part = json.loads(json_part.strip())
40
+ return json_part
41
+
42
+ except Exception as e:
43
+ print(f"\033[31m Exception occurred while loading JSON: {str(e)} [0m")
44
+ return e
src/app/api/module/vectorsearch.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from config import OPENAI_API_KEY, file_Directory
3
+ from langchain_community.document_loaders.csv_loader import CSVLoader
4
+ from langchain_openai import OpenAIEmbeddings
5
+ from langchain.text_splitter import CharacterTextSplitter
6
+ from langchain_community.vectorstores import Chroma
7
+ import pandas as pd
8
+
9
+ os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
10
+
11
+
12
+ # df = pd.read_excel(r"/home/vrush/Catalog-Digitization-/src/module/data/Catalog Digitization/ONDC Test Data _ Images/ONDCSampleData.xlsx")
13
+ # df_new = pd.DataFrame(columns=["id", "name"])
14
+ # df_new = df['name']
15
+ # df_new.to_csv(r"data/data.csv", index=False)
16
+
17
+ def create_vector():
18
+ loader = CSVLoader(file_path="data/data.csv")
19
+ docs = loader.load()
20
+ text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
21
+ documents = text_splitter.split_documents(docs)
22
+ db_path = os.path.join(file_Directory,"vectorstore")
23
+ embeddings = OpenAIEmbeddings()
24
+ os.makedirs(db_path, exist_ok=True)
25
+ Chroma.from_documents(docs, embeddings, persist_directory= db_path)
26
+
27
+ def search(query):
28
+ embeddings = OpenAIEmbeddings()
29
+ db_path = os.path.join(file_Directory,"vectorstore")
30
+ db = Chroma(persist_directory= db_path, embedding_function= embeddings)
31
+ embedding_vector = OpenAIEmbeddings().embed_query(query)
32
+ docs = db.similarity_search_by_vector(embedding_vector)
33
+ print(docs[0].page_content)
34
+ return docs[0].page_content
35
+
36
+
37
+ def get_detail_df(name):
38
+ df = pd.read_excel(r"/home/vrush/Catalog-Digitization-/src/module/data/Catalog Digitization/ONDC Test Data _ Images/ONDCSampleData.xlsx")
39
+ for item in df.iterrows():
40
+ if item['name'] == name:
41
+ return item
42
+ else:
43
+ return None
44
+
45
+ if __name__ == "__main__":
46
+ create_vector()
47
+ name = search("Choco Creme Wafers")
48
+ print(get_detail_df(name))