Spaces:

eth-zurich-cle
/

Scideck

Runtime error

File size: 7,750 Bytes

f8ccd9c
edd57c3
9634f2d
d391eaa
9634f2d
 
 
007770e
 
b9923b7
2504cff
 
9634f2d
 
 
 
2504cff
 
9634f2d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
edd57c3
cab36bf
 
 
 
 
de8d8bf
fd188ea
9634f2d
fd188ea
 
9634f2d
 
 
b9923b7
 
 
 
 
9634f2d
 
 
 
 
 
 
2504cff
9634f2d
 
 
455edde
b9923b7
455edde
 
9652e54
455edde
b9923b7
455edde
 
1f0d187
 
 
 
 
b9923b7
1f0d187
 
 
 
 
 
 
 
 
b9923b7
455edde
 
 
9634f2d
38b10ba
a5926f8
38b10ba
1c7e415
 
bc59001
 
 
 
b9923b7
 
 
d391eaa
2ef4d5b
d391eaa
 
 
9634f2d
 
1c7e415
fd188ea
a5926f8
9634f2d
 
 
 
a5926f8
 
 
 
9634f2d
 
 
8da6c61
9634f2d
8da6c61
 
455edde
5054699
55bc6eb
5054699
 
 
 
15fc5af
770532b
 
 
38b10ba
 
 
 
55bc6eb
38b10ba
15f60af
38b10ba
15f60af
 
5054699
38b10ba
 
 
 
5054699
cab36bf
de8d8bf
cab36bf
455edde
 
15fc5af
cab36bf
15fc5af
 
59fb81e
 
8da6c61
59fb81e
d9efbf0
 
f8ccd9c
fd188ea
1c7e415
b9923b7
 
cab36bf
282a613
fd188ea

from gradio_client import Client
import gradio as gr
import os, uuid, json, random, time
import datetime
from huggingface_hub import hf_api, CommitScheduler, HfApi
from pathlib import Path

# deckify_private = "ByMatthew/deckify_private"
deckify_private = "eth-zurich-cle/deckify_private"

repo_id = "eth-zurich-cle/scideck-dataset"

feedback_file = Path("output_data/") / f"output_{uuid.uuid4()}.json"
feedback_folder = feedback_file.parent

scheduler = CommitScheduler(
    # repo_id="eth-zurich-cle/deckify-dataset",
    repo_id=repo_id,
    repo_type="dataset",
    folder_path=feedback_folder,
    path_in_repo="output_data",
    every=10,
)

# scheduler = CommitScheduler(
#     repo_id="eth-zurich-cle/deckify-dataset",
#     repo_type="dataset",
#     folder_path=feedback_folder,
#     path_in_repo="input_data",
#     every=10,
# )

api = HfApi()

def check_password(username, password):
    if password == os.environ["ACCESS"]:
        return True
    else:
        return False

def func(file, number_of_pages, secret):
    
    if secret != os.environ["ACCESS"]:
        return "Wrong password, please try again"

    date_string = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
    
    # use only the filename form an absolute path basename
    dir, filename = os.path.split(file)
    print(f"dir: {dir}, filename: {filename}")
    
    unique_filename = f"{filename.split('.')[0]}_{date_string}.{filename.split('.')[-1]}"
    
    print(unique_filename)
    
    api.upload_file(
        # path_or_fileobj="/path/to/local/folder/README.md",
        path_or_fileobj=file,
        path_in_repo=f"input_files/{unique_filename}",
        repo_id=repo_id,
        repo_type="dataset",
    )
    
    
    space_runtime = hf_api.get_space_runtime(deckify_private, token=read_key)
    print(f"Space runtime: {space_runtime}")
    
    if not space_runtime.stage == "RUNNING": # might need to check lowercase or something
        
        space_runtime_after_restart = hf_api.restart_space(deckify_private, token=read_key)
        print(f"Space runtime after restart: {space_runtime_after_restart}")
        
        
        max_retries = 20
        retry_delay = 10
        success = False
        for i in range(max_retries):
            space_runtime = hf_api.get_space_runtime(deckify_private, token=read_key)
            print(f"Space runtime: {space_runtime}")
            if space_runtime.stage == "RUNNING":
                success = True
                break
            time.sleep(retry_delay)
        
        if not success:
            return "Failed to start the private space in time. Please try again later."
        
    client = Client(deckify_private, hf_token=read_key)
    
    print(f"Client: {client}")


    # output, parsed_document = client.predict(file, number_of_pages)
    output, latex_output, latex_time, openai_time = client.predict(file, number_of_pages)
    
    if "Error" in output:
        return output   
    # generate a random sequence of numbers
    # s = "".join([str(random.randint(0, 9)) for i in range(10)])
    # with open(f"{s}.tex", "w", encoding="utf-8") as f:
    #     f.write(text)
    
    
    save_output(unique_filename, output, number_of_pages, date_string)

    temp_string = "% The following slides are generated with [[SCIDECK]](https://huggingface.co/spaces/eth-zurich-cle/Scideck)"
    temp_string += "\n% Generated on " + date_string
    temp_string += "\n%" + "-"*100 + "\n"
    output = temp_string + output 
    
    
    return output

def save_output(unique_filename: str, output: str, num_pages:int, date_string: str, latex_output: str, latex_time: float, openai_time: float) -> None:

    # Append outputs and using a thread lock to avoid concurrent writes from different users.
    with scheduler.lock:
        with feedback_file.open("a") as f:
            f.write(json.dumps({"input_name": unique_filename, "output": output, 
                                "num_pages": num_pages, "timestamp": date_string,
                                "latex": latex_output, "latex_extraction_time": latex_time, 
                                "openai_call_time": openai_time}))
            f.write("\n")


def upload_file(file):
    print(file)
    return file.name

# 📝 If you get an error message, you can send me email with the PDF file attached to this email address: <b>nkoisheke [at] ethz [dot] ch</b>, and I will generate the slides for you. If there are any other issues or questions, please do not hesitate to contact me 🤗 <br>
description = r"""
<h3> SCIDECK is a tool that allows you to convert your PDF files into a presentation deck.</h3>
<br>

❗️❗️❗️[<b>Important</b>] Instructions:<br>
1️⃣ <b>Upload the PDF document</b>: Select the PDF file you want to convert into slides.<br>
2️⃣ <b>Specify the number of pages</b>: Indicate the range of pages you'd like to include in the slide generation. <b>Set it to 0</b> if you want to include all pages. <br>
3️⃣ <b>Enter the password provided in the invite email.</b><br>
4️⃣ <b>Click the Generate button</b>: Initiate the slide generation process by clicking the designated "Generate" button.<br>
5️⃣ <b>Be patient 🙂</b>: Generating the slides could take between 1 minute and 5 minutes.<br>
6️⃣ <b>Download the slides</b>: Once the slides are generated, you can download them by clicking the "Copy" button.<br>
7️⃣ <b>Feedback</b>: Please fill out the following [[Feedback Form]](https://docs.google.com/forms/d/e/1FAIpQLScFVZJeNSa9L4t8z5B8whzoLvlNpb95bQdroIPID7aNdv0i4w/viewform?fbzx=-3656849655817576014) <br>

📝 If you have any other issues or questions, please do not hesitate to contact us at ..... 🤗 <br>

Disclaimer: The uploaded files along with the generated outputs will be stored in order to evaluate and improve the service. <br>

Note: If the background process is not running, it may take up to 3 min for it to start. <br>

ver 0.1
"""
# 🖼️ Some examples of slides generated using <b>SCIDECK</b> are shown below: <br>
# 1. Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift [[Paper]](https://arxiv.org/pdf/1502.03167.pdf) [[Slides]](https://drive.google.com/file/d/1Zt5FFH0nKxut-LyEr9pNAIdtgR_lBtIj/view?usp=sharing) <br>
# 2. Attention Is All You Need [[Paper]](https://arxiv.org/pdf/1706.03762.pdf) [[Slides]](https://drive.google.com/file/d/1xKgohh_QKV9pD_XjDuXR566h0VJ1S7WI/view?usp=sharing) <br>
# 3. Denoising Diffusion Probabilistic Models [[Paper]](https://arxiv.org/pdf/2006.11239.pdf) [[Slides]](https://drive.google.com/file/d/1D2ZfoJpHR3kP0JdsYyjxUq-vjVMV-KTO/view?usp=sharing) <br>

read_key = os.environ.get("HF_TOKEN", None)

if __name__ == "__main__":
    # client = Client.duplicate("ByMatthew/deckify_private", hf_token=read_key)
    
    temp = "<h1> SCIDECK: Generate slides (LaTeX Beamer) from PDF</h1>"
    with gr.Blocks() as demo:
        gr.Markdown(temp)
        gr.Image("demo.png", width=600, show_download_button=False, show_label=False)
        gr.Markdown(description)
        
        file_output = gr.File()
        upload_button = gr.UploadButton("Click to Upload a PDF File", file_types=["file"], file_count="single", size="sm")
        upload_button.upload(upload_file, upload_button, file_output)

        number_of_pages = gr.Number(label="Number of pages")
        secret = gr.Textbox(label="Password", type="password")
        output = gr.Textbox(label="Output", show_copy_button=True, interactive=False)
        genereate_slides_btn = gr.Button("Generate slides")
        genereate_slides_btn.click(fn=func, inputs=[upload_button, number_of_pages, secret], outputs=output, api_name="genereate_slides")
    
    demo.queue(max_size=30)
    demo.launch()