Praveen0309 commited on
Commit
0bcbf99
1 Parent(s): 4ad9e0d

Application1

Browse files
Files changed (2) hide show
  1. requirements.txt +31 -0
  2. try3.py +115 -0
requirements.txt ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate==0.29.3
2
+ beautifulsoup4==4.12.3
3
+ bitsandbytes==0.42.0
4
+ certifi==2024.2.2
5
+ charset-normalizer==3.3.2
6
+ deep-translator==1.11.4
7
+ filelock==3.14.0
8
+ fsspec==2024.3.1
9
+ huggingface-hub==0.22.2
10
+ idna==3.7
11
+ Jinja2==3.1.3
12
+ MarkupSafe==2.1.5
13
+ mpmath==1.3.0
14
+ networkx==3.2.1
15
+ numpy==1.26.4
16
+ packaging==24.0
17
+ peft==0.10.0
18
+ psutil==5.9.8
19
+ PyYAML==6.0.1
20
+ regex==2024.4.28
21
+ requests==2.31.0
22
+ safetensors==0.4.3
23
+ scipy==1.13.0
24
+ soupsieve==2.5
25
+ sympy==1.12
26
+ tokenizers==0.19.1
27
+ torch==2.2.2
28
+ tqdm==4.66.2
29
+ transformers==4.40.1
30
+ typing_extensions==4.11.0
31
+ urllib3==2.2.1
try3.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """Untitled1.ipynb
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1vA1O3q8yuzV8Hi3O8LhNuLGWS18yVEkb
8
+ """
9
+
10
+ import streamlit as st
11
+ import PIL.Image
12
+ import base64
13
+ import time
14
+ import os
15
+ import torch
16
+ from transformers import AutoProcessor, LlavaForConditionalGeneration, BitsAndBytesConfig
17
+ from peft import PeftModel
18
+ from deep_translator import GoogleTranslator
19
+
20
+
21
+ @st.cache_resource
22
+ def load_model():
23
+ model_id = "HuggingFaceH4/vsft-llava-1.5-7b-hf-trl"
24
+ quantization_config = BitsAndBytesConfig(load_in_4bit=True)
25
+ base_model = LlavaForConditionalGeneration.from_pretrained(model_id, quantization_config=quantization_config, torch_dtype=torch.float16)
26
+
27
+ # Load the PEFT Lora adapter
28
+ peft_lora_adapter_path = "Praveen0309/llava-1.5-7b-hf-ft-mix-vsft-3"
29
+ peft_lora_adapter = PeftModel.from_pretrained(base_model, peft_lora_adapter_path, adapter_name="lora_adapter")
30
+ base_model.load_adapter(peft_lora_adapter_path, adapter_name="lora_adapter")
31
+
32
+ processor = AutoProcessor.from_pretrained("HuggingFaceH4/vsft-llava-1.5-7b-hf-trl")
33
+
34
+ return base_model, processor
35
+
36
+ base_model, processor = load_model()
37
+
38
+ # Function to translate text from Bengali to English
39
+ def deep_translator_bn_en(input_sentence):
40
+ english_translation = GoogleTranslator(source="bn", target="en").translate(input_sentence)
41
+ return english_translation
42
+
43
+ # Function to translate text from English to Bengali
44
+ def deep_translator_en_bn(input_sentence):
45
+ bengali_translation = GoogleTranslator(source="en", target="bn").translate(input_sentence)
46
+ return bengali_translation
47
+
48
+ def inference(image, image_prompt):
49
+ prompt = f"USER: <image>\n{image_prompt} ASSISTANT:"
50
+
51
+ # Assuming your model can handle PIL images
52
+ image = image.convert("RGB") # Ensure image is RGB mode
53
+
54
+ inputs = processor(text=prompt, images=image, return_tensors="pt")
55
+ generate_ids = base_model.generate(**inputs, max_new_tokens=15)
56
+ decoded_response = processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
57
+ return decoded_response
58
+
59
+ def image_to_base64(image_path):
60
+ with open(image_path, 'rb') as img:
61
+ encoded_string = base64.b64encode(img.read())
62
+ return encoded_string.decode('utf-8')
63
+
64
+ # Function that takes User Inputs and displays it on ChatUI
65
+ def query_message(history,txt,img):
66
+ image_prompt = deep_translator_bn_en(txt)
67
+ history += [(image_prompt,None)]
68
+ base64 = image_to_base64(img)
69
+ data_url = f"data:image/jpeg;base64,{base64}"
70
+ history += [(f"{image_prompt} ![]({data_url})", None)]
71
+ return history
72
+
73
+ # Function that takes User Inputs, generates Response and displays on Chat UI
74
+ def llm_response(history,text,img):
75
+ image_prompt = deep_translator_bn_en(text)
76
+ response = inference(img,image_prompt)
77
+ assistant_index = response.find("ASSISTANT:")
78
+ extracted_string = response[assistant_index + len("ASSISTANT:"):].strip()
79
+ output = deep_translator_en_bn(extracted_string)
80
+ history += [(text,output)]
81
+ return history
82
+
83
+ # Interface Code
84
+ st.title('My_BoT')
85
+
86
+ # Create a sidebar
87
+ sidebar = st.sidebar
88
+ sidebar.header('User Inputs')
89
+
90
+ # Add a file uploader to the sidebar
91
+ uploaded_file = sidebar.file_uploader("Upload an Image", type=['png', 'jpg', 'jpeg'])
92
+
93
+ # Add a text input to the sidebar
94
+ text_input = sidebar.text_input("Enter text and press enter")
95
+
96
+ # Initialize session state for history if it doesn't exist
97
+ if 'history' not in st.session_state:
98
+ st.session_state.history = []
99
+
100
+ # Check if text is entered and no image is uploaded
101
+ if text_input and uploaded_file is None:
102
+ st.write("Please upload an image.")
103
+
104
+ # Add a button to the sidebar
105
+ submit_button = sidebar.button("Submit")
106
+
107
+ # When the button is clicked, generate the response and display it
108
+ if submit_button:
109
+ if uploaded_file is not None:
110
+ image = PIL.Image.open(uploaded_file)
111
+ st.session_state.history = llm_response(st.session_state.history, text_input, image)
112
+ for text, output in st.session_state.history:
113
+ st.write(f"User: {text}")
114
+ if output is not None:
115
+ st.write(f"Assistant: {output}")