Spaces:

JiaxinGe
/

AutoPresent

Running

App Files Files Community

para-lost commited on Nov 30, 2024

Commit

2dd1349

1 Parent(s): 0cde187

add slides lib

Browse files

Files changed (9) hide show

SlidesLib/README.md +46 -0
SlidesLib/__init__.py +191 -0
SlidesLib/image_gen.py +28 -0
SlidesLib/llm.py +34 -0
SlidesLib/plotting.py +56 -0
SlidesLib/ppt_gen.py +173 -0
SlidesLib/search.py +149 -0
SlidesLib/vqa.py +44 -0
requirements.txt +1 -2

SlidesLib/README.md ADDED Viewed

	@@ -0,0 +1,46 @@

+# SlidesLib
+SlidesLib is a Python library for slide generation, providing APIs for image generation, Google search, and slide customization.
+## Features
+- **Image Generation**: Create images using the DALL-E API.
+- **Search Integration**: Perform Google searches, save screenshots, and retrieve images.
+- **Slide Customization**: Add text, bullet points, images, and set slide backgrounds.
+## Installation
+1. **Dependencies**: Install required Python libraries:
+   ```bash
+   pip install -r requirements.txt
+   ```
+2. **Google Chrome**: Required for search functionality:
+   ```bash
+   wget https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb
+   sudo dpkg -i google-chrome-stable_current_amd64.deb
+   sudo apt-get install -f
+   ```
+3. **OpenAI API Key**: Export your API key:
+   ```bash
+   export OPENAI_API_KEY="your_api_key"
+   ```
+## Quick Start
+- **Image Generation**:
+   ```python
+   from slidesLib.image_gen import Dalle3
+   Dalle3.generate_image("A futuristic cityscape", save_path="cityscape.png")
+   ```
+- **Search Integration**:
+   ```python
+   from slidesLib.search import GoogleSearch
+   GoogleSearch.search_result("Tallest building in the world", "result.png")
+   ```
+- **Slide Customization**:
+   ```python
+   from slidesLib.ppt_gen import add_title
+   add_title(slide, text="Welcome to SlidesLib")
+   ```
+For more examples, refer to the code in this folder.
+```

SlidesLib/__init__.py ADDED Viewed

	@@ -0,0 +1,191 @@

+from .search import GoogleSearch
+from .vqa import VQA
+from .image_gen import Dalle3
+from .llm import LLM
+from .ppt_gen import SlideAgent
+from pptx.util import Inches, Pt
+from pptx.dml.color import RGBColor
+from pptx.enum.text import MSO_AUTO_SIZE
+from mysearchlib import LLM
+from pptx.dml.color import RGBColor
+def search_result(question: str, screenshot_path: str = "screenshot.png") -> str:
+    """
+    Search a question on Google, and take a screenshot of the search result.
+    Save the screenshot to screenshot_path, and return the path.
+    """
+    return GoogleSearch.search_result(question, screenshot_path)
+def search_image(query: str, save_path: str = 'top_images') -> str:
+    """
+    Search for an image on Google and download the result to download_path.
+    Return download_path.
+    """
+    return GoogleSearch.search_image(query, save_path)
+def get_answer(question: str) -> str:
+    """
+    Calls the LLM by inputing a question,
+    then get the response of the LLM as the answer
+    """
+    return LLM.get_answer(question)
+def get_code(request:str, examples:str = "") -> str:
+    """
+    Calls the LLM to generate code for a request.
+    request: the task that the model should conduct
+    examples: few-shot code examples for the request
+    """
+    return LLM.get_answer(request, examples)
+def generate_image(query: str, save_path: str = "downloaded_image.png") -> str:
+    """
+    Generate an image based on a text query, save the image to the save_path
+    Return the path of the saved image.
+    """
+    return Dalle3.generate_image(query, save_path)
+def add_title(
+    slide, text: str, font_size: int = 44,
+    font_color: tuple[int, int, int] = (0, 0, 0),
+    background_color: tuple[int, int, int] = None,
+):
+    """Add a title text to the slide with custom font size and font color (RGB tuple).
+    Args:
+        slide: Slide object as in pptx library
+        text: str, Title text to be added
+        font_size: int, Font size in int (point size), e.g., 44
+        font_color: tuple(int,int,int), RGB color, e.g., (0, 0, 0)
+        background_color: Optional, tuple(int,int,int), RGB color, e.g., (255, 255, 255)
+    Rets:
+        slide: Slide object with the title added
+    """
+    title_shape = slide.shapes.title
+    if title_shape is None:
+        # Add a new text box as the title if no placeholder is found
+        title_shape = slide.shapes.add_textbox(Inches(1), Inches(0.5), Inches(8), Inches(1))
+    title_shape.text = text
+    for paragraph in title_shape.text_frame.paragraphs:
+        paragraph.font.size = Pt(font_size)
+        paragraph.font.color.rgb = RGBColor(*font_color)
+    if background_color is not None:
+        title_shape.fill.solid()
+        title_shape.fill.fore_color.rgb = RGBColor(*background_color)
+    return slide
+def add_text(
+    slide, text: str, coords: list[float],
+    font_size: int = 20, bold: bool = False,
+    color: tuple[int, int, int] = (0, 0, 0),
+    background_color: tuple[int, int, int] = None,
+    auto_size: bool = True,
+):
+    """Add a text box at a specified location with custom text and color settings.
+    Args:
+        slide: Slide object as in pptx library
+        text: str, Text to be added
+        coords: list(float), [left, top, width, height] in inches
+        font_size: int, Font size in int (point size), e.g., 20
+        bold: bool, True if bold-type the text, False otherwise
+        color: tuple(int,int,int), RGB color, e.g., (0, 0, 0)
+        background_color: Optional, tuple(int,int,int), RGB color, e.g., (255, 255, 255)
+        auto_size: bool, True if auto-size the text box, False otherwise
+    Rets:
+        slide: Slide object with the text box added
+    """
+    # Create the text box shape
+    left, top, width, height = coords
+    text_box = slide.shapes.add_textbox(Inches(left), Inches(top), Inches(width), Inches(height))
+    # Set background color if provided
+    if background_color:
+        text_box.fill.solid()
+        text_box.fill.fore_color.rgb = RGBColor(*background_color)
+    else:
+        text_box.fill.background()  # No fill if no color is specified
+    # Handle line breaks and adjust height
+    lines = text.split("\n")
+    adjusted_height = height * len(lines)  # Adjust height based on the number of lines
+    text_box.height = Inches(adjusted_height)
+    # Set text and format it
+    text_frame = text_box.text_frame
+    text_frame.word_wrap = True
+    if auto_size:
+        text_frame.auto_size = MSO_AUTO_SIZE.SHAPE_TO_FIT_TEXT  # Automatically fit the text box to the text
+    p = text_frame.add_paragraph()
+    p.text = text
+    p.font.size = Pt(font_size)
+    p.font.bold = bold
+    p.font.color.rgb = RGBColor(*color)
+    return slide
+def add_bullet_points(
+    slide, bullet_points: list[str], coords: list[float],
+    font_size: int = 18, color: tuple[int, int, int] = (0, 0, 0),
+    background_color: tuple[int, int, int] = None,
+):
+    """Add a text box with bullet points.
+    Args:
+        slide: Slide object as in pptx library
+        bullet_points: list(str), List of texts to be added as bullet points
+        coords: list(float), [left, top, width, height] in inches
+        font_size: int, Font size in int (point size), e.g., 18
+        color: tuple(int,int,int), RGB color, e.g., (0, 0, 0)
+        background_color: Optional, tuple(int,int,int), RGB color, e.g., (255, 255, 255)
+    Rets:
+        slide: Slide object with the bullet points added
+    """
+    left, top, width, height = coords
+    text_box = slide.shapes.add_textbox(Inches(left), Inches(top), Inches(width), Inches(height))
+    # Set background color if provided
+    if background_color:
+        text_box.fill.solid()
+        text_box.fill.fore_color.rgb = RGBColor(*background_color)
+    else:
+        text_box.fill.background()  # No fill if no color is specified
+    text_frame = text_box.text_frame
+    text_frame.word_wrap = True
+    text_frame.auto_size = MSO_AUTO_SIZE.TEXT_TO_FIT_SHAPE
+    for point in bullet_points:
+        p = text_frame.add_paragraph()
+        p.text = point
+        p.font.size = Pt(font_size)
+        p.font.color.rgb = RGBColor(*color)
+        # p.level = bullet_points.index(point)
+    return slide
+def add_image(slide, image_path: str, coords: list[float]):
+    """Add an image in the provided path to the specified coords and sizes.
+    Args:
+        slide: Slide object as in pptx library
+        image_path: str, Path to the image file
+        coords: list(float), [left, top, width, height] in inches
+    Rets:
+        slide: Slide object with the image added
+    """
+    left, top, width, height = coords
+    slide.shapes.add_picture(image_path, Inches(left), Inches(top), Inches(width), Inches(height))
+    return slide
+def set_background_color(slide, color: tuple[int, int, int]):
+    """Set background color for the current slide.
+    Args:
+        slide: Slide object as in pptx library
+        color: tuple(int, int, int), RGB color, e.g., (255, 255, 255)
+    Returns:
+        modified slide object
+    """
+    fill = slide.background.fill
+    fill.solid()
+    fill.fore_color.rgb = RGBColor(*color)  # Convert tuple to RGBColor
+    return slide

SlidesLib/image_gen.py ADDED Viewed

	@@ -0,0 +1,28 @@

+from openai import OpenAI
+import requests
+class Dalle3():
+    @classmethod
+    def __init_dalle__(cls):
+        client = OpenAI()
+        return client
+    @classmethod
+    def generate_image(cls, query: str, save_path: str = "downloaded_image.png"):
+        """Generate an image based on a text query, save the image to the save_path"""
+        client = cls.__init_dalle__()
+        response = client.images.generate(
+            model="dall-e-3",
+            prompt=query,
+            size="1024x1024",
+            quality="standard",
+            n=1,
+        )
+        image_url = response.data[0].url
+        # Send a GET request to the URL
+        response = requests.get(image_url)
+        # Open a file in binary write mode and write the content of the response
+        with open(save_path, "wb") as file:
+            file.write(response.content)
+        return save_path

SlidesLib/llm.py ADDED Viewed

	@@ -0,0 +1,34 @@

+from openai import OpenAI
+import requests
+class LLM():
+    """Calls the LLM"""
+    @classmethod
+    def __init_llm__(cls):
+        client = OpenAI()
+        code_prompt = "Directly Generate executable python code for the following request:\n"
+        return client, code_prompt
+    @classmethod
+    def get_answer(cls, question: str):
+        """Calls the LLM by inputing a question,
+        then get the response of the LLM as the answer"""
+        client, code_prompt = cls.__init_llm__()
+        response = client.chat.completions.create(
+            model="gpt-4o-mini",
+            messages=[
+                {"role": "system", "content": "You are a helpful assistant."},
+                {"role": "user", "content": question}
+            ]
+        )
+        return response.choices[0].message.content
+    @classmethod
+    def get_code(cls, request:str, examples:str = ""):
+        """
+        Calls the LLM to generate code for a request.
+        request: the task that the model should conduct
+        examples: few-shot code examples for the request
+        """
+        client, code_prompt = cls.__init_llm__()
+        code = cls.get_answer(code_prompt + examples + request)
+        return code

SlidesLib/plotting.py ADDED Viewed

	@@ -0,0 +1,56 @@

+# Call Matplotlib Library to draw graphs (Bar/Plot...)
+import matplotlib.pyplot as plt
+from llm import *
+class Plotting:
+    def bar_plot(self, data: dict, title: str, xlabel: str, ylabel: str, output_path: str = 'bar_plot.png'):
+        """
+        Create a bar plot.
+        :param data: Dictionary containing data to plot (keys as labels, values as heights).
+        :param title: Title of the plot.
+        :param xlabel: Label for the X-axis.
+        :param ylabel: Label for the Y-axis.
+        :param output_path: Path to save the plot image.
+        """
+        labels = list(data.keys())
+        heights = list(data.values())
+        plt.figure(figsize=(10, 6))
+        plt.bar(labels, heights, color='skyblue')
+        plt.title(title)
+        plt.xlabel(xlabel)
+        plt.ylabel(ylabel)
+        plt.tight_layout()
+        plt.savefig(output_path)
+        plt.close()
+        return output_path
+    def line_plot(self, data: dict, title: str, xlabel: str, ylabel: str, output_path: str = 'line_plot.png'):
+        """
+        Create a line plot.
+        :param data: Dictionary containing data to plot (keys as x-values, values as y-values).
+        :param title: Title of the plot.
+        :param xlabel: Label for the X-axis.
+        :param ylabel: Label for the Y-axis.
+        :param output_path: Path to save the plot image.
+        """
+        x_values = list(data.keys())
+        y_values = list(data.values())
+        plt.figure(figsize=(10, 6))
+        plt.plot(x_values, y_values, marker='o', color='skyblue')
+        plt.title(title)
+        plt.xlabel(xlabel)
+        plt.ylabel(ylabel)
+        plt.grid(True)
+        plt.tight_layout()
+        plt.savefig(output_path)
+        plt.close()
+        return output_path
+    def get_plot(self, data):
+        instruction = ""

SlidesLib/ppt_gen.py ADDED Viewed

	@@ -0,0 +1,173 @@

+from pptx import Presentation
+from pptx.util import Inches as _Inches, Pt as _Pt
+from pptx.dml.color import RGBColor
+from pptx.enum.text import PP_ALIGN, MSO_AUTO_SIZE
+from pptx.enum.shapes import MSO_AUTO_SHAPE_TYPE, MSO_SHAPE_TYPE
+from io import BytesIO
+ARROW_ADD = '"""<a:tailEnd type="arrow" xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main"/>"""'
+class SlideAgent:
+    def __init__(self, slide_width=13.33, slide_height=7.5):
+        """Initialize a new presentation with specified slide dimensions in inches."""
+        self.prs = Presentation()
+        self.prs.slide_width = self._inches(slide_width)
+        self.prs.slide_height = self._inches(slide_height)
+        self.slide = None
+    def _inches(self, val):
+        """Helper method to convert to Inches."""
+        return _Inches(val)
+    def _points(self, val):
+        """Helper method to convert to Points."""
+        return _Pt(val)
+    # ------- Slide APIs -------
+    def add_slide(self, layout=0):
+        """Create a new slide with a specific layout."""
+        slide_layout = self.prs.slide_layouts[layout]
+        self.slide = self.prs.slides.add_slide(slide_layout)
+    # ------- Text APIs -------
+    def add_title(self, text, font_size=44, font_color=(0, 0, 0)):
+        """Add a title to the slide with a custom font size (in points) and font color (RGB tuple)."""
+        title_shape = self.slide.shapes.title
+        title_shape.text = text
+        self._format_text(title_shape.text_frame, self._points(font_size), RGBColor(*font_color))
+    def add_text(self, text, top, left, width, height, font_size=20, bold=False, color=(0, 0, 0), background_color=None, auto_size=True):
+        """Add a text box at a specified location with custom text settings and optional background color."""
+        # Create the text box shape
+        text_box = self.slide.shapes.add_textbox(self._inches(left), self._inches(top), self._inches(width), self._inches(height))
+        # Set background color if provided
+        if background_color:
+            text_box.fill.solid()
+            text_box.fill.fore_color.rgb = RGBColor(*background_color)
+        else:
+            text_box.fill.background()  # No fill if no color is specified
+        # Handle line breaks and adjust height
+        lines = text.split("\n")
+        adjusted_height = height * len(lines)  # Adjust height based on the number of lines
+        text_box.height = self._inches(adjusted_height)
+        # Set text and format it
+        text_frame = text_box.text_frame
+        text_frame.word_wrap = True
+        if auto_size:
+            text_frame.auto_size = MSO_AUTO_SIZE.SHAPE_TO_FIT_TEXT  # Automatically fit the text box to the text
+        self._format_paragraph(text_frame, text, self._points(font_size), bold, RGBColor(*color))
+    def add_bullet_points(self, bullet_points, top, left, width, height, font_size=18, color=(0, 0, 0)):
+        """Add a text box with bullet points."""
+        text_box = self.slide.shapes.add_textbox(self._inches(left), self._inches(top), self._inches(width), self._inches(height))
+        text_frame = text_box.text_frame
+        text_frame.word_wrap = True
+        text_frame.auto_size = MSO_AUTO_SIZE.TEXT_TO_FIT_SHAPE
+        for point in bullet_points:
+            p = text_frame.add_paragraph()
+            p.text = point
+            self._format_text(p, self._points(font_size), RGBColor(*color))
+            p.level = bullet_points.index(point)
+    # ------- Image APIs -------
+    def add_image(self, image_path, top, left, width, height):
+        """Add an image at a specified location."""
+        self.slide.shapes.add_picture(image_path, self._inches(left), self._inches(top), self._inches(width), self._inches(height))
+    def add_image_centered(self, image_path, image_width, image_height):
+        """Add an image centered on the slide."""
+        slide_width = self.prs.slide_width.inches
+        slide_height = self.prs.slide_height.inches
+        left = (slide_width - image_width) / 2
+        top = (slide_height - image_height) / 2
+        self.add_image(image_path, top, left, image_width, image_height)
+    # ------- Shape APIs -------
+    def add_shape(self, shape_type, top, left, width, height, fill_color=None):
+        """Add a shape to the slide, supporting MSO_AUTO_SHAPE_TYPE."""
+        if isinstance(shape_type, str):
+            # Check if the shape type is a valid string, otherwise raise an error
+            try:
+                shape_type = getattr(MSO_AUTO_SHAPE_TYPE, shape_type.upper())
+            except AttributeError:
+                raise ValueError(f"Invalid shape type: {shape_type}. Must be a valid MSO_AUTO_SHAPE_TYPE.")
+        # Now create the shape with the validated or passed enum type
+        shape = self.slide.shapes.add_shape(shape_type, self._inches(left), self._inches(top), self._inches(width), self._inches(height))
+        if fill_color:
+            shape.fill.solid()
+            shape.fill.fore_color.rgb = RGBColor(*fill_color)
+    def add_straight_arrow(self, start_x, start_y, end_x, end_y):
+        connector = self.slide.shapes.add_connector("MSO_CONNECTOR.STRAIGHT", start_x, start_y, end_x, end_y)
+    def add_straight_line(self, start_x, start_y, end_x, end_y):
+        connector = self.slide.shapes.add_connector("MSO_CONNECTOR.STRAIGHT", start_x, start_y, end_x, end_y)
+        line_elem = connector.line._get_or_add_ln()
+        line_elem.append(parse_xml({ARROW_ADD}))
+    # ------- Table APIs -------
+    def add_table(self, rows, cols, top, left, width, height, column_widths=None):
+        """Add a table to the slide."""
+        table = self.slide.shapes.add_table(rows, cols, left, top, width, height).table
+        if column_widths:
+            for idx, col_width in enumerate(column_widths):
+                table.columns[idx].width = Inches(col_width)
+        return table
+    # ------- Helper APIs -------
+    def set_background_color(self, color):
+        """Set background color for the current slide."""
+        background = self.slide.background
+        fill = background.fill
+        fill.solid()
+        fill.fore_color.rgb = color
+    def duplicate_slide(self, slide_index):
+        """Duplicate a slide by index."""
+        template_slide = self.prs.slides[slide_index]
+        new_slide = self.prs.slides.add_slide(template_slide.slide_layout)
+        for shape in template_slide.shapes:
+            self._copy_shape(shape, new_slide)
+    def save_presentation(self, file_name):
+        """Save the PowerPoint presentation."""
+        self.prs.save(file_name)
+    # ------- Internal Helper Methods -------
+    def _format_paragraph(self, text_frame, text, font_size, bold, color):
+        """Helper function to format text within a text frame."""
+        p = text_frame.add_paragraph()
+        p.text = text
+        p.font.size = font_size
+        p.font.bold = bold
+        p.font.color.rgb = color
+    def _format_text(self, text_frame, font_size, font_color):
+        """Helper function to format text in a text frame."""
+        for paragraph in text_frame.paragraphs:
+            paragraph.font.size = font_size
+            paragraph.font.color.rgb = font_color
+    def _copy_shape(self, shape, slide):
+        """Copy a shape from one slide to another."""
+        if shape.shape_type == MSO_SHAPE_TYPE.PICTURE:
+            image = BytesIO(shape.image.blob)
+            slide.shapes.add_picture(image, shape.left, shape.top, shape.width, shape.height)
+        elif shape.has_text_frame:
+            new_shape = slide.shapes.add_textbox(shape.left, shape.top, shape.width, shape.height)
+            new_shape.text = shape.text
+            self._format_text(new_shape.text_frame, shape.text_frame.paragraphs[0].font.size, shape.text_frame.paragraphs[0].font.color.rgb)

SlidesLib/search.py ADDED Viewed

	@@ -0,0 +1,149 @@

+from selenium import webdriver
+from selenium.webdriver.chrome.service import Service
+from selenium.webdriver.chrome.options import Options
+from chromedriver_py import binary_path
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.support import expected_conditions as EC
+import time
+import requests
+from selenium.webdriver.common.keys import Keys
+from google_images_download import google_images_download
+from bing_image_downloader import downloader
+import os
+import shutil
+class GoogleSearch:
+    @classmethod
+    def _init_driver(cls):
+        chrome_options = Options()
+        chrome_options.add_argument("--headless")
+        chrome_options.add_argument("--disable-gpu")
+        chrome_options.add_argument("--no-sandbox")
+        chrome_options.add_argument("--disable-dev-shm-usage")
+        service = Service(binary_path)
+        driver = webdriver.Chrome(service=service, options=chrome_options)
+        wait = WebDriverWait(driver, 100)
+        return driver, wait
+    @classmethod
+    def search_result(cls, question: str, screenshot_path: str = "screenshot.png") -> str:
+        """Search a question on Google and return a screenshot of the search result."""
+        driver, wait = cls._init_driver()
+        if not question:
+            raise ValueError("Please provide a question")
+        # Perform Google search
+        search_url = f"https://www.google.com/search?q={question}"
+        driver.get(search_url)
+        # Give some time for the page to load
+        time.sleep(3)
+        # Take a screenshot
+        driver.save_screenshot(screenshot_path)
+        driver.quit()
+        return screenshot_path
+    @classmethod
+    def search_image_org(cls, query: str, download_path: str = 'top_image.png') -> str:
+        """Search for an image on Google and download the top result."""
+        driver, wait = cls._init_driver()
+        if not query:
+            raise ValueError("Please provide a query")
+        # Perform Google image search
+        search_url = f"https://www.google.com/search?tbm=isch&q={query}"
+        driver.get(search_url)
+        # Find all image elements
+        image_elements = driver.find_elements(By.CSS_SELECTOR, "img")
+        # Filter out Google icon images and get the first valid image URL
+        image_url = None
+        for img in image_elements:
+            src = img.get_attribute("src")
+            if src and "googlelogo" not in src:
+                image_url = src
+                try:
+                    response = requests.get(image_url)
+                    with open(download_path, 'wb') as file:
+                        file.write(response.content)
+                    driver.quit()
+                    print(image_url)
+                    return download_path
+                except Exception:
+                    print("Error downloading image, skipping.")
+                    continue
+        driver.quit()
+        raise Exception("No valid image found")
+    @classmethod
+    def search_image_prev(cls, query, output_dir='./downloads', limit=10):
+        # Download images using Bing Image Downloader
+        downloader.download(query, limit=limit, output_dir=output_dir, adult_filter_off=True, force_replace=False, timeout=60)
+        # List the files in the output directory
+        image_dir = os.path.join(output_dir, query)
+        if not os.path.exists(image_dir):
+            raise FileNotFoundError(f"No images found for query '{query}' in directory '{output_dir}'")
+        # Collect all image paths
+        image_paths = [os.path.join(image_dir, file) for file in os.listdir(image_dir) if file.endswith(('jpg', 'jpeg', 'png'))]
+        # Return the first image
+        return image_paths[0]
+    @classmethod
+    def search_image_prev(cls, query, output_dir='./downloads', limit=10):
+        # Download images using Bing Image Downloader
+        downloader.download(query, limit=limit, output_dir=output_dir, adult_filter_off=True, force_replace=False, timeout=60)
+        # List the files in the output directory
+        image_dir = os.path.join(output_dir, query)
+        if not os.path.exists(image_dir):
+            raise FileNotFoundError(f"No images found for query '{query}' in directory '{output_dir}'")
+        # Collect all image paths
+        image_paths = [os.path.join(image_dir, file) for file in os.listdir(image_dir) if file.endswith(('jpg', 'jpeg', 'png'))]
+        # Return the first image
+        return image_paths[0]
+    @classmethod
+    def search_image(cls, query, save_path):
+        """
+        Search for an image based on the query and save the result to the specified path.
+        Args:
+            query (str): The query to search for.
+            save_path (str): The path to save the downloaded image.
+        Returns:
+            str: The path where the image was saved.
+        """
+        # Create a temporary directory for storing downloaded images
+        temp_dir = "./temp_download"
+        os.makedirs(temp_dir, exist_ok=True)
+        # Download only the top image result
+        downloader.download(query, limit=1, output_dir=temp_dir, adult_filter_off=True, force_replace=True, timeout=60)
+        # Construct the expected directory and image path
+        image_dir = os.path.join(temp_dir, query)
+        image_files = [file for file in os.listdir(image_dir) if file.endswith(('jpg', 'jpeg', 'png'))]
+        # Check if any image files were downloaded
+        if not image_files:
+            raise FileNotFoundError(f"No images found for query '{query}'.")
+        # Copy the top image to the desired save path
+        top_image_path = os.path.join(image_dir, image_files[0])
+        shutil.move(top_image_path, save_path)
+        # Clean up temporary directory
+        shutil.rmtree(temp_dir)
+        return save_path

SlidesLib/vqa.py ADDED Viewed

	@@ -0,0 +1,44 @@

+from transformers import BlipProcessor, BlipForQuestionAnswering
+from PIL import Image
+import requests
+import re
+class VQA:
+    def __init__(self, gpu_number=0):
+        use_load_8bit= False
+        from transformers import AutoProcessor, InstructBlipForConditionalGeneration, InstructBlipProcessor
+        self.model = InstructBlipForConditionalGeneration.from_pretrained("Salesforce/instructblip-vicuna-7b", device_map="auto")
+        self.processor = InstructBlipProcessor.from_pretrained("Salesforce/instructblip-vicuna-7b")
+        self.model.eval()
+        self.qa_prompt =  "Question: {} Short answer:"
+        self.caption_prompt = "\n<image>\na photo of"
+        self.max_words = 50
+    def pre_question(self, question):
+        # from LAVIS blip_processors
+        question = re.sub(
+            r"([.!\"()*#:;~])",
+            "",
+            question.lower(),
+        )
+        question = question.rstrip(" ")
+        # truncate question
+        question_words = question.split(" ")
+        if len(question_words) > self.max_words:
+            question = " ".join(question_words[: self.max_words])
+        return question
+    def qa(self, image_path, question):
+        image = Image.open(image_path)
+        question = self.pre_question(question)
+        inputs = self.processor(images=image, text=question, return_tensors="pt", padding="longest").to(self.model.device)
+        generated_ids = self.model.generate(**inputs, length_penalty=-1, num_beams=5, max_length=30, min_length=1,
+                                            do_sample=False, top_p=0.9, repetition_penalty=1.0,
+                                            num_return_sequences=1, temperature=1)
+        generated_text = self.processor.batch_decode(generated_ids, skip_special_tokens=True)
+        return generated_text[0]

requirements.txt CHANGED Viewed

@@ -4,5 +4,4 @@ openai
 python-pptx
 numpy
 colormath
-scipy
--e ./SlidesAgent

 python-pptx
 numpy
 colormath
+scipy