Spaces:
Runtime error
Runtime error
Added extraction of labels from image
Browse files- .env.example +2 -1
- .gitignore +2 -1
- code_tasks/custom.py +22 -0
- code_tasks/images_in_url.py +42 -0
- code_tasks/{url_text.py → text_in_url.py} +0 -0
- control_flow/main.py +6 -1
.env.example
CHANGED
@@ -1 +1,2 @@
|
|
1 |
-
OPENAI_KEY_PERSONAL=<openai_key>
|
|
|
|
1 |
+
OPENAI_KEY_PERSONAL=<openai_key>
|
2 |
+
GOOGLE_APPLICATION_CREDENTIALS=<gcp_key>
|
.gitignore
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
venv
|
2 |
.vscode
|
3 |
logs
|
4 |
-
.env
|
|
|
|
1 |
venv
|
2 |
.vscode
|
3 |
logs
|
4 |
+
.env
|
5 |
+
google-vision.json
|
code_tasks/custom.py
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from concurrent.futures import ThreadPoolExecutor
|
2 |
+
from typing import Dict, List
|
3 |
+
|
4 |
+
from google.cloud import vision
|
5 |
+
|
6 |
+
from utils.io import print_system
|
7 |
+
|
8 |
+
|
9 |
+
def get_labels_from_image(url: str) -> List[str]:
|
10 |
+
client = vision.ImageAnnotatorClient()
|
11 |
+
image = vision.Image()
|
12 |
+
image.source.image_uri = url # type: ignore
|
13 |
+
|
14 |
+
response = client.label_detection(image=image) # type: ignore
|
15 |
+
return [label.description for label in response.label_annotations]
|
16 |
+
|
17 |
+
|
18 |
+
def get_labels_from_images(urls) -> Dict:
|
19 |
+
print_system("Generating labels for images...")
|
20 |
+
with ThreadPoolExecutor() as executor:
|
21 |
+
labels = list(executor.map(get_labels_from_image, urls))
|
22 |
+
return {urls[i]: labels[i] for i, url in enumerate(labels)}
|
code_tasks/images_in_url.py
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
ChatGPT-4 prompt: write a python function that given an url returns all images in the website
|
3 |
+
"""
|
4 |
+
|
5 |
+
import requests
|
6 |
+
from bs4 import BeautifulSoup
|
7 |
+
|
8 |
+
def get_images_from_url(url):
|
9 |
+
# Send a GET request to the URL
|
10 |
+
response = requests.get(url)
|
11 |
+
|
12 |
+
# If the GET request is successful, the status code will be 200
|
13 |
+
if response.status_code == 200:
|
14 |
+
# Get the content of the response
|
15 |
+
page_content = response.content
|
16 |
+
|
17 |
+
# Create a BeautifulSoup object and specify the parser
|
18 |
+
soup = BeautifulSoup(page_content, 'html.parser')
|
19 |
+
|
20 |
+
# Find all image tags
|
21 |
+
images = soup.find_all('img')
|
22 |
+
|
23 |
+
# Create a list to store the URLs of the images
|
24 |
+
image_urls = []
|
25 |
+
|
26 |
+
# For each image tag, get the URL of the image
|
27 |
+
for image in images:
|
28 |
+
# If the tag has the 'src' attribute
|
29 |
+
if image.has_attr('src'):
|
30 |
+
image_url = image['src']
|
31 |
+
|
32 |
+
# If URL is relative, convert it to absolute
|
33 |
+
if image_url.startswith('/'):
|
34 |
+
image_url = url + image_url
|
35 |
+
|
36 |
+
image_urls.append(image_url)
|
37 |
+
|
38 |
+
return image_urls
|
39 |
+
|
40 |
+
# If the GET request is not successful, return None
|
41 |
+
else:
|
42 |
+
return None
|
code_tasks/{url_text.py → text_in_url.py}
RENAMED
File without changes
|
control_flow/main.py
CHANGED
@@ -1,13 +1,18 @@
|
|
1 |
from ai import image
|
2 |
from ai_tasks.best_headlines import get_headlines
|
3 |
from ai_tasks.image_prompt import generate_prompt
|
4 |
-
from code_tasks.
|
|
|
|
|
5 |
from utils.io import print_assistant, print_system, user_input
|
6 |
|
7 |
|
8 |
def run():
|
9 |
url = user_input("URL: ")
|
10 |
text = get_text_from_url(url)
|
|
|
|
|
|
|
11 |
headlines = get_headlines(text)
|
12 |
print_assistant(headlines)
|
13 |
prompt = generate_prompt(text)
|
|
|
1 |
from ai import image
|
2 |
from ai_tasks.best_headlines import get_headlines
|
3 |
from ai_tasks.image_prompt import generate_prompt
|
4 |
+
from code_tasks.custom import get_labels_from_images
|
5 |
+
from code_tasks.images_in_url import get_images_from_url
|
6 |
+
from code_tasks.text_in_url import get_text_from_url
|
7 |
from utils.io import print_assistant, print_system, user_input
|
8 |
|
9 |
|
10 |
def run():
|
11 |
url = user_input("URL: ")
|
12 |
text = get_text_from_url(url)
|
13 |
+
images = get_images_from_url(url)
|
14 |
+
image_labels = get_labels_from_images(images)
|
15 |
+
|
16 |
headlines = get_headlines(text)
|
17 |
print_assistant(headlines)
|
18 |
prompt = generate_prompt(text)
|