Claudz163 commited on
Commit
f9d8d7e
1 Parent(s): 321df2e

added main

Browse files
Files changed (1) hide show
  1. app.py +58 -45
app.py CHANGED
@@ -5,49 +5,62 @@ from PIL import Image
5
  import os
6
 
7
 
8
- api_key = os.getenv("HUGGINGFACE_TOKEN")
9
- client = InferenceClient(api_key=api_key)
10
-
11
- st.header("Character Captions (IN PROGRESS!)")
12
- st.write("Have a character caption any image you upload!")
13
- character = st.selectbox("Choose a character", ["rapper", "shrek", "unintelligible"])
14
-
15
- uploaded_img = st.file_uploader("Upload an image")
16
-
17
- if uploaded_img is not None:
18
-
19
- image = Image.open(uploaded_img)
20
- st.image(image)
21
-
22
- image_captioner = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
23
-
24
- response = image_captioner(image)
25
- caption = response[0]['generated_text']
26
-
27
- character_prompts = {
28
- "rapper": f"Describe this scene like you're a rapper: {caption}.",
29
- "shrek": f"Describe this scene like you're Shrek: {caption}.",
30
- "unintelligible": f"Describe this scene in a way that makes no sense: {caption}."
31
- }
32
-
33
- prompt = character_prompts[character]
34
-
35
- messages = [
36
- { "role": "user", "content": prompt }
37
- ]
38
-
39
- stream = client.chat.completions.create(
40
- model="meta-llama/Llama-3.2-3B-Instruct",
41
- messages=messages,
42
- max_tokens=500,
43
- stream=True
44
- )
45
-
46
- response = ''
47
- for chunk in stream:
48
- response += chunk.choices[0].delta.content
49
-
50
- st.write(response)
51
-
52
-
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
 
5
  import os
6
 
7
 
8
+ def initialize():
9
+ if 'initialized' not in st.session_state: # Initialize only once
10
+ print("Initializing...")
11
+ st.session_state['initialized'] = True
12
+ st.session_state['api_key'] = os.getenv("HUGGINGFACE_TOKEN")
13
+ st.session_state['client'] = InferenceClient(api_key=st.session_state['api_key'])
14
+
15
+
16
+ def main():
17
+ initialize()
18
+ st.header("Character Captions")
19
+ st.write("Have a character caption any image you upload!")
20
+ character = st.selectbox("Choose a character", ["rapper", "shrek", "unintelligible", "cookie monster"])
21
+
22
+ uploaded_img = st.file_uploader("Upload an image")
23
+
24
+ if uploaded_img is not None:
25
+ # Open Image
26
+ image = Image.open(uploaded_img)
27
+ st.image(image)
28
+
29
+ # Get caption from image
30
+ image_captioner = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
31
+ response = image_captioner(image)
32
+ caption = response[0]['generated_text']
33
+
34
+
35
+ # Pass the caption to a character prompt
36
+ character_prompts = {
37
+ "rapper": f"Describe this caption like you're a rapper: {caption}.",
38
+ "shrek": f"Describe this caption like you're Shrek: {caption}.",
39
+ "unintelligible": f"Describe this caption in a way that makes no sense: {caption}.",
40
+ "cookie monster": f"Describe this caption like you're cookie monster: {caption}."
41
+ }
42
+
43
+ prompt = character_prompts[character]
44
+ messages = [
45
+ { "role": "user", "content": prompt }
46
+ ]
47
+
48
+ # Pass to Llama for character output regarding image caption
49
+ stream = st.session_state['client'].chat.completions.create(
50
+ model="meta-llama/Llama-3.2-3B-Instruct",
51
+ messages=messages,
52
+ max_tokens=500,
53
+ stream=True
54
+ )
55
+
56
+ response = ''
57
+ for chunk in stream:
58
+ response += chunk.choices[0].delta.content
59
+
60
+ st.write(response)
61
+
62
+
63
+
64
+ if __name__ == '__main__':
65
+ main()
66