from gradio_client import Client import gradio as gr import os, uuid, json, random, time import datetime from huggingface_hub import hf_api, CommitScheduler, HfApi from pathlib import Path deckify_private = "ByMatthew/deckify_private" # deckify_private = "eth-zurich-cle/deckify_private" repo_id = "eth-zurich-cle/scideck-dataset" feedback_file = Path("output_data/") / f"output_{uuid.uuid4()}.json" feedback_folder = feedback_file.parent scheduler = CommitScheduler( # repo_id="eth-zurich-cle/deckify-dataset", repo_id=repo_id, repo_type="dataset", folder_path=feedback_folder, path_in_repo="output_data", every=10, ) # scheduler = CommitScheduler( # repo_id="eth-zurich-cle/deckify-dataset", # repo_type="dataset", # folder_path=feedback_folder, # path_in_repo="input_data", # every=10, # ) api = HfApi() def check_password(username, password): if password == os.environ["ACCESS"]: return True else: return False def func(file, number_of_pages, secret): if secret != os.environ["ACCESS"]: return "Wrong password, please try again" date_string = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S") # use only the filename form an absolute path basename dir, filename = os.path.split(file) print(f"dir: {dir}, filename: {filename}") unique_filename = f"{filename.split('.')[0]}_{date_string}.{filename.split('.')[-1]}" print(unique_filename) api.upload_file( # path_or_fileobj="/path/to/local/folder/README.md", path_or_fileobj=file, path_in_repo=f"input_files/{unique_filename}", repo_id=repo_id, repo_type="dataset", ) space_runtime = hf_api.get_space_runtime(deckify_private, token=read_key) print(f"Space runtime: {space_runtime}") if not space_runtime.stage == "RUNNING": # might need to check lowercase or something space_runtime_after_restart = hf_api.restart_space(deckify_private, token=read_key) print(f"Space runtime after restart: {space_runtime_after_restart}") max_retries = 20 retry_delay = 10 success = False for i in range(max_retries): space_runtime = hf_api.get_space_runtime(deckify_private, token=read_key) print(f"Space runtime: {space_runtime}") if space_runtime.stage == "RUNNING": success = True break time.sleep(retry_delay) if not success: return "Failed to start the private space in time. Please try again later." client = Client(deckify_private, hf_token=read_key) print(f"Client: {client}") # output, parsed_document = client.predict(file, number_of_pages) output, latex_output, latex_time, openai_time = client.predict(file, number_of_pages) if "Error" in output: return output # generate a random sequence of numbers # s = "".join([str(random.randint(0, 9)) for i in range(10)]) # with open(f"{s}.tex", "w", encoding="utf-8") as f: # f.write(text) save_output(unique_filename, output, number_of_pages, date_string) temp_string = "% The following slides are generated with [[SCIDECK]](https://huggingface.co/spaces/eth-zurich-cle/Scideck)" temp_string += "\n% Generated on " + date_string temp_string += "\n%" + "-"*100 + "\n" output = temp_string + output return output def save_output(unique_filename: str, output: str, num_pages:int, date_string: str, latex_output: str, latex_time: float, openai_time: float) -> None: # Append outputs and using a thread lock to avoid concurrent writes from different users. with scheduler.lock: with feedback_file.open("a") as f: f.write(json.dumps({"input_name": unique_filename, "output": output, "num_pages": num_pages, "timestamp": date_string, "latex": latex_output, "latex_extraction_time": latex_time, "openai_call_time": openai_time})) f.write("\n") def upload_file(file): print(file) return file.name # 📝 If you get an error message, you can send me email with the PDF file attached to this email address: nkoisheke [at] ethz [dot] ch, and I will generate the slides for you. If there are any other issues or questions, please do not hesitate to contact me 🤗
description = r"""

SCIDECK is a tool that allows you to convert your PDF files into a presentation deck.


❗️❗️❗️[Important] Instructions:
1️⃣ Upload the PDF document: Select the PDF file you want to convert into slides.
2️⃣ Specify the number of pages: Indicate the range of pages you'd like to include in the slide generation. Set it to 0 if you want to include all pages.
3️⃣ Enter the password provided in the invite email.
4️⃣ Click the Generate button: Initiate the slide generation process by clicking the designated "Generate" button.
5️⃣ Be patient 🙂: Generating the slides could take between 1 minute and 5 minutes.
6️⃣ Download the slides: Once the slides are generated, you can download them by clicking the "Copy" button.
7️⃣ Feedback: Please fill out the following [[Feedback Form]](https://docs.google.com/forms/d/e/1FAIpQLScFVZJeNSa9L4t8z5B8whzoLvlNpb95bQdroIPID7aNdv0i4w/viewform?fbzx=-3656849655817576014)
📝 If you have any other issues or questions, please do not hesitate to contact us at ..... 🤗
Disclaimer: The uploaded files along with the generated outputs will be stored in order to evaluate and improve the service.
Note: If the background process is not running, it may take up to 3 min for it to start.
ver 0.1 """ # 🖼️ Some examples of slides generated using SCIDECK are shown below:
# 1. Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift [[Paper]](https://arxiv.org/pdf/1502.03167.pdf) [[Slides]](https://drive.google.com/file/d/1Zt5FFH0nKxut-LyEr9pNAIdtgR_lBtIj/view?usp=sharing)
# 2. Attention Is All You Need [[Paper]](https://arxiv.org/pdf/1706.03762.pdf) [[Slides]](https://drive.google.com/file/d/1xKgohh_QKV9pD_XjDuXR566h0VJ1S7WI/view?usp=sharing)
# 3. Denoising Diffusion Probabilistic Models [[Paper]](https://arxiv.org/pdf/2006.11239.pdf) [[Slides]](https://drive.google.com/file/d/1D2ZfoJpHR3kP0JdsYyjxUq-vjVMV-KTO/view?usp=sharing)
read_key = os.environ.get("HF_TOKEN", None) if __name__ == "__main__": # client = Client.duplicate("ByMatthew/deckify_private", hf_token=read_key) temp = "

SCIDECK: Generate slides (LaTeX Beamer) from PDF

" with gr.Blocks() as demo: gr.Markdown(temp) gr.Image("demo.png", width=600, show_download_button=False, show_label=False) gr.Markdown(description) file_output = gr.File() upload_button = gr.UploadButton("Click to Upload a PDF File", file_types=["file"], file_count="single", size="sm") upload_button.upload(upload_file, upload_button, file_output) number_of_pages = gr.Number(label="Number of pages") secret = gr.Textbox(label="Password", type="password") output = gr.Textbox(label="Output", show_copy_button=True, interactive=False) genereate_slides_btn = gr.Button("Generate slides") genereate_slides_btn.click(fn=func, inputs=[upload_button, number_of_pages, secret], outputs=output, api_name="genereate_slides") demo.queue(max_size=30) demo.launch()