lgaleana commited on
Commit
bbf59eb
·
1 Parent(s): 4ef5c66

Added extraction of labels from image

Browse files
.env.example CHANGED
@@ -1 +1,2 @@
1
- OPENAI_KEY_PERSONAL=<openai_key>
 
 
1
+ OPENAI_KEY_PERSONAL=<openai_key>
2
+ GOOGLE_APPLICATION_CREDENTIALS=<gcp_key>
.gitignore CHANGED
@@ -1,4 +1,5 @@
1
  venv
2
  .vscode
3
  logs
4
- .env
 
 
1
  venv
2
  .vscode
3
  logs
4
+ .env
5
+ google-vision.json
code_tasks/custom.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from concurrent.futures import ThreadPoolExecutor
2
+ from typing import Dict, List
3
+
4
+ from google.cloud import vision
5
+
6
+ from utils.io import print_system
7
+
8
+
9
+ def get_labels_from_image(url: str) -> List[str]:
10
+ client = vision.ImageAnnotatorClient()
11
+ image = vision.Image()
12
+ image.source.image_uri = url # type: ignore
13
+
14
+ response = client.label_detection(image=image) # type: ignore
15
+ return [label.description for label in response.label_annotations]
16
+
17
+
18
+ def get_labels_from_images(urls) -> Dict:
19
+ print_system("Generating labels for images...")
20
+ with ThreadPoolExecutor() as executor:
21
+ labels = list(executor.map(get_labels_from_image, urls))
22
+ return {urls[i]: labels[i] for i, url in enumerate(labels)}
code_tasks/images_in_url.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ChatGPT-4 prompt: write a python function that given an url returns all images in the website
3
+ """
4
+
5
+ import requests
6
+ from bs4 import BeautifulSoup
7
+
8
+ def get_images_from_url(url):
9
+ # Send a GET request to the URL
10
+ response = requests.get(url)
11
+
12
+ # If the GET request is successful, the status code will be 200
13
+ if response.status_code == 200:
14
+ # Get the content of the response
15
+ page_content = response.content
16
+
17
+ # Create a BeautifulSoup object and specify the parser
18
+ soup = BeautifulSoup(page_content, 'html.parser')
19
+
20
+ # Find all image tags
21
+ images = soup.find_all('img')
22
+
23
+ # Create a list to store the URLs of the images
24
+ image_urls = []
25
+
26
+ # For each image tag, get the URL of the image
27
+ for image in images:
28
+ # If the tag has the 'src' attribute
29
+ if image.has_attr('src'):
30
+ image_url = image['src']
31
+
32
+ # If URL is relative, convert it to absolute
33
+ if image_url.startswith('/'):
34
+ image_url = url + image_url
35
+
36
+ image_urls.append(image_url)
37
+
38
+ return image_urls
39
+
40
+ # If the GET request is not successful, return None
41
+ else:
42
+ return None
code_tasks/{url_text.py → text_in_url.py} RENAMED
File without changes
control_flow/main.py CHANGED
@@ -1,13 +1,18 @@
1
  from ai import image
2
  from ai_tasks.best_headlines import get_headlines
3
  from ai_tasks.image_prompt import generate_prompt
4
- from code_tasks.url_text import get_text_from_url
 
 
5
  from utils.io import print_assistant, print_system, user_input
6
 
7
 
8
  def run():
9
  url = user_input("URL: ")
10
  text = get_text_from_url(url)
 
 
 
11
  headlines = get_headlines(text)
12
  print_assistant(headlines)
13
  prompt = generate_prompt(text)
 
1
  from ai import image
2
  from ai_tasks.best_headlines import get_headlines
3
  from ai_tasks.image_prompt import generate_prompt
4
+ from code_tasks.custom import get_labels_from_images
5
+ from code_tasks.images_in_url import get_images_from_url
6
+ from code_tasks.text_in_url import get_text_from_url
7
  from utils.io import print_assistant, print_system, user_input
8
 
9
 
10
  def run():
11
  url = user_input("URL: ")
12
  text = get_text_from_url(url)
13
+ images = get_images_from_url(url)
14
+ image_labels = get_labels_from_images(images)
15
+
16
  headlines = get_headlines(text)
17
  print_assistant(headlines)
18
  prompt = generate_prompt(text)