{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2111\n", " name class file_name\n", "0 Loperamide Hydrochloride 2 MG Oral Capsule 1052 26211358.jpg\n", "1 Minocycline 50 MG Oral Capsule 1137 36231B28.jpg\n", "2 Nortriptyline 10 MG Oral Capsule 1193 24231228.jpg\n", "3 Nortriptyline 25 MG Oral Capsule 1194 20231018.jpg\n", "4 Nortriptyline 50 MG Oral Capsule 1195 2D2316D8.jpg\n", "... ... ... ...\n", "4327 Enalapril Maleate 2.5 MG Oral Tablet 708 DC4D6E2B.jpg\n", "4328 quinapril 5 MG Oral Tablet 1844 EA507553.jpg\n", "4329 trospium chloride 20 MG Oral Tablet 1959 3C519E2C.jpg\n", "4330 metaxalone 400 MG Oral Tablet 1749 C6506353.jpg\n", "4331 naratriptan 2.5 MG Oral Tablet 1775 66453369.jpg\n", "\n", "[4332 rows x 3 columns]\n", "4332\n" ] } ], "source": [ "df = pd.read_csv('image_class.csv')\n", "df = df[['name', 'class']]\n", "df.drop_duplicates(inplace=True)\n", "print(len(df))\n", "df1 = pd.read_csv('image_class.csv')\n", "df1 = df1[['name', 'class', 'im_file']]\n", "df1['file_name'] = df1['im_file'].apply(lambda v: v.split('_')[-1])\n", "df1.drop(columns='im_file', inplace=True)\n", "df1.drop_duplicates(inplace=True)\n", "print(df1)\n", "print(len(df1))" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
nameclassfile_name
213pregabalin 75 MG Oral Capsule [Lyrica]18315715ABFD.jpg
\n", "
" ], "text/plain": [ " name class file_name\n", "213 pregabalin 75 MG Oral Capsule [Lyrica] 1831 5715ABFD.jpg" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# df1[df1.file_name.str.endswith('26211358.jpg')]\n", "df1[df1.file_name.str.endswith('5715ABFD.jpg')]" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "ename": "TypeError", "evalue": "'StringMethods' object is not callable", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[8], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# file_name = '00071-1014-68_NLMIMAGE10_5715ABFD.jpeg'\u001b[39;00m\n\u001b[1;32m 2\u001b[0m file_name \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m23155-0055-19_NLMIMAGE10_66453369.jpg\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m----> 3\u001b[0m df1[\u001b[43mdf1\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mfile_name\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstr\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39msplit(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m_\u001b[39m\u001b[38;5;124m'\u001b[39m,\u001b[38;5;241m1\u001b[39m)[\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m]\u001b[38;5;241m==\u001b[39mfile_name]\n", "\u001b[0;31mTypeError\u001b[0m: 'StringMethods' object is not callable" ] } ], "source": [ "# file_name = '00071-1014-68_NLMIMAGE10_5715ABFD.jpeg'\n", "file_name = '23155-0055-19_NLMIMAGE10_66453369.jpg'\n", "df1[df1['file_name'].str().split('_',1)[-1]==file_name]" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 [00093-0311-01, RXNAVIMAGE10, 26211358.jpg]\n", "1 [00093-3165-01, RXNAVIMAGE10, 36231B28.jpg]\n", "2 [00093-0810-01, RXNAVIMAGE10, 24231228.jpg]\n", "3 [00093-0811-01, RXNAVIMAGE10, 20231018.jpg]\n", "4 [00093-0812-01, RXNAVIMAGE10, 2D2316D8.jpg]\n", " ... \n", "8629 [16714-0442-01, NLMIMAGE10, DC4D6E2B.jpg]\n", "8630 [31722-0267-90, NLMIMAGE10, EA507553.jpg]\n", "8631 [00574-0145-60, NLMIMAGE10, 3C519E2C.jpg]\n", "8632 [64720-0126-10, NLMIMAGE10, C6506353.jpg]\n", "8633 [23155-0055-19, NLMIMAGE10, 66453369.jpg]\n", "Name: file_name, Length: 8634, dtype: object" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1.file_name.str.split('_')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "yolov9", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.19" } }, "nbformat": 4, "nbformat_minor": 2 }