File size: 7,750 Bytes
f8ccd9c
edd57c3
9634f2d
d391eaa
9634f2d
 
 
007770e
 
b9923b7
2504cff
 
9634f2d
 
 
 
2504cff
 
9634f2d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
edd57c3
cab36bf
 
 
 
 
de8d8bf
fd188ea
9634f2d
fd188ea
 
9634f2d
 
 
b9923b7
 
 
 
 
9634f2d
 
 
 
 
 
 
2504cff
9634f2d
 
 
455edde
b9923b7
455edde
 
9652e54
455edde
b9923b7
455edde
 
1f0d187
 
 
 
 
b9923b7
1f0d187
 
 
 
 
 
 
 
 
b9923b7
455edde
 
 
9634f2d
38b10ba
a5926f8
38b10ba
1c7e415
 
bc59001
 
 
 
b9923b7
 
 
d391eaa
2ef4d5b
d391eaa
 
 
9634f2d
 
1c7e415
fd188ea
a5926f8
9634f2d
 
 
 
a5926f8
 
 
 
9634f2d
 
 
8da6c61
9634f2d
8da6c61
 
455edde
5054699
55bc6eb
5054699
 
 
 
15fc5af
770532b
 
 
38b10ba
 
 
 
55bc6eb
38b10ba
15f60af
38b10ba
15f60af
 
5054699
38b10ba
 
 
 
5054699
cab36bf
de8d8bf
cab36bf
455edde
 
15fc5af
cab36bf
15fc5af
 
59fb81e
 
8da6c61
59fb81e
d9efbf0
 
f8ccd9c
fd188ea
1c7e415
b9923b7
 
cab36bf
282a613
fd188ea
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
from gradio_client import Client
import gradio as gr
import os, uuid, json, random, time
import datetime
from huggingface_hub import hf_api, CommitScheduler, HfApi
from pathlib import Path

# deckify_private = "ByMatthew/deckify_private"
deckify_private = "eth-zurich-cle/deckify_private"

repo_id = "eth-zurich-cle/scideck-dataset"

feedback_file = Path("output_data/") / f"output_{uuid.uuid4()}.json"
feedback_folder = feedback_file.parent

scheduler = CommitScheduler(
    # repo_id="eth-zurich-cle/deckify-dataset",
    repo_id=repo_id,
    repo_type="dataset",
    folder_path=feedback_folder,
    path_in_repo="output_data",
    every=10,
)

# scheduler = CommitScheduler(
#     repo_id="eth-zurich-cle/deckify-dataset",
#     repo_type="dataset",
#     folder_path=feedback_folder,
#     path_in_repo="input_data",
#     every=10,
# )

api = HfApi()

def check_password(username, password):
    if password == os.environ["ACCESS"]:
        return True
    else:
        return False

def func(file, number_of_pages, secret):
    
    if secret != os.environ["ACCESS"]:
        return "Wrong password, please try again"

    date_string = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
    
    # use only the filename form an absolute path basename
    dir, filename = os.path.split(file)
    print(f"dir: {dir}, filename: {filename}")
    
    unique_filename = f"{filename.split('.')[0]}_{date_string}.{filename.split('.')[-1]}"
    
    print(unique_filename)
    
    api.upload_file(
        # path_or_fileobj="/path/to/local/folder/README.md",
        path_or_fileobj=file,
        path_in_repo=f"input_files/{unique_filename}",
        repo_id=repo_id,
        repo_type="dataset",
    )
    
    
    space_runtime = hf_api.get_space_runtime(deckify_private, token=read_key)
    print(f"Space runtime: {space_runtime}")
    
    if not space_runtime.stage == "RUNNING": # might need to check lowercase or something
        
        space_runtime_after_restart = hf_api.restart_space(deckify_private, token=read_key)
        print(f"Space runtime after restart: {space_runtime_after_restart}")
        
        
        max_retries = 20
        retry_delay = 10
        success = False
        for i in range(max_retries):
            space_runtime = hf_api.get_space_runtime(deckify_private, token=read_key)
            print(f"Space runtime: {space_runtime}")
            if space_runtime.stage == "RUNNING":
                success = True
                break
            time.sleep(retry_delay)
        
        if not success:
            return "Failed to start the private space in time. Please try again later."
        
    client = Client(deckify_private, hf_token=read_key)
    
    print(f"Client: {client}")


    # output, parsed_document = client.predict(file, number_of_pages)
    output, latex_output, latex_time, openai_time = client.predict(file, number_of_pages)
    
    if "Error" in output:
        return output   
    # generate a random sequence of numbers
    # s = "".join([str(random.randint(0, 9)) for i in range(10)])
    # with open(f"{s}.tex", "w", encoding="utf-8") as f:
    #     f.write(text)
    
    
    save_output(unique_filename, output, number_of_pages, date_string)

    temp_string = "% The following slides are generated with [[SCIDECK]](https://huggingface.co/spaces/eth-zurich-cle/Scideck)"
    temp_string += "\n% Generated on " + date_string
    temp_string += "\n%" + "-"*100 + "\n"
    output = temp_string + output 
    
    
    return output

def save_output(unique_filename: str, output: str, num_pages:int, date_string: str, latex_output: str, latex_time: float, openai_time: float) -> None:

    # Append outputs and using a thread lock to avoid concurrent writes from different users.
    with scheduler.lock:
        with feedback_file.open("a") as f:
            f.write(json.dumps({"input_name": unique_filename, "output": output, 
                                "num_pages": num_pages, "timestamp": date_string,
                                "latex": latex_output, "latex_extraction_time": latex_time, 
                                "openai_call_time": openai_time}))
            f.write("\n")


def upload_file(file):
    print(file)
    return file.name

# 📝 If you get an error message, you can send me email with the PDF file attached to this email address: <b>nkoisheke [at] ethz [dot] ch</b>, and I will generate the slides for you. If there are any other issues or questions, please do not hesitate to contact me 🤗 <br>
description = r"""
<h3> SCIDECK is a tool that allows you to convert your PDF files into a presentation deck.</h3>
<br>

❗️❗️❗️[<b>Important</b>] Instructions:<br>
1️⃣ <b>Upload the PDF document</b>: Select the PDF file you want to convert into slides.<br>
2️⃣ <b>Specify the number of pages</b>: Indicate the range of pages you'd like to include in the slide generation. <b>Set it to 0</b> if you want to include all pages. <br>
3️⃣ <b>Enter the password provided in the invite email.</b><br>
4️⃣ <b>Click the Generate button</b>: Initiate the slide generation process by clicking the designated "Generate" button.<br>
5️⃣ <b>Be patient 🙂</b>: Generating the slides could take between 1 minute and 5 minutes.<br>
6️⃣ <b>Download the slides</b>: Once the slides are generated, you can download them by clicking the "Copy" button.<br>
7️⃣ <b>Feedback</b>: Please fill out the following [[Feedback Form]](https://docs.google.com/forms/d/e/1FAIpQLScFVZJeNSa9L4t8z5B8whzoLvlNpb95bQdroIPID7aNdv0i4w/viewform?fbzx=-3656849655817576014) <br>

📝 If you have any other issues or questions, please do not hesitate to contact us at ..... 🤗 <br>

Disclaimer: The uploaded files along with the generated outputs will be stored in order to evaluate and improve the service. <br>

Note: If the background process is not running, it may take up to 3 min for it to start. <br>

ver 0.1
"""
# 🖼️ Some examples of slides generated using <b>SCIDECK</b> are shown below: <br>
# 1. Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift [[Paper]](https://arxiv.org/pdf/1502.03167.pdf) [[Slides]](https://drive.google.com/file/d/1Zt5FFH0nKxut-LyEr9pNAIdtgR_lBtIj/view?usp=sharing) <br>
# 2. Attention Is All You Need [[Paper]](https://arxiv.org/pdf/1706.03762.pdf) [[Slides]](https://drive.google.com/file/d/1xKgohh_QKV9pD_XjDuXR566h0VJ1S7WI/view?usp=sharing) <br>
# 3. Denoising Diffusion Probabilistic Models [[Paper]](https://arxiv.org/pdf/2006.11239.pdf) [[Slides]](https://drive.google.com/file/d/1D2ZfoJpHR3kP0JdsYyjxUq-vjVMV-KTO/view?usp=sharing) <br>

read_key = os.environ.get("HF_TOKEN", None)

if __name__ == "__main__":
    # client = Client.duplicate("ByMatthew/deckify_private", hf_token=read_key)
    
    temp = "<h1> SCIDECK: Generate slides (LaTeX Beamer) from PDF</h1>"
    with gr.Blocks() as demo:
        gr.Markdown(temp)
        gr.Image("demo.png", width=600, show_download_button=False, show_label=False)
        gr.Markdown(description)
        
        file_output = gr.File()
        upload_button = gr.UploadButton("Click to Upload a PDF File", file_types=["file"], file_count="single", size="sm")
        upload_button.upload(upload_file, upload_button, file_output)

        number_of_pages = gr.Number(label="Number of pages")
        secret = gr.Textbox(label="Password", type="password")
        output = gr.Textbox(label="Output", show_copy_button=True, interactive=False)
        genereate_slides_btn = gr.Button("Generate slides")
        genereate_slides_btn.click(fn=func, inputs=[upload_button, number_of_pages, secret], outputs=output, api_name="genereate_slides")
    
    demo.queue(max_size=30)
    demo.launch()