Spaces:

einanao
/

cobra

Sleeping

App Files Files Community

einanao commited on Nov 4, 2022

Commit

799789f

1 Parent(s): 218c37d

cache info rates

Browse files

Files changed (1) hide show

app.py +35 -26

app.py CHANGED Viewed

@@ -168,41 +168,50 @@ def strike(url, speedup_factor, min_speedup, max_speedup, max_num_segments):
     with st.spinner("downloading..."):
         name = download(url, YDL_OPTS)
-    assert name.endswith(".m4a")
-    name = name.split(".m4a")[0].split("/")[-1]
-    audio_path = os.path.join(DATA_DIR, "%s.mp3" % name)
-    transcript_path = os.path.join(DATA_DIR, "%s.json" % name)
-    output_path = os.path.join(DATA_DIR, "%s_smooth.mp3" % name)
     with st.spinner("transcribing..."):
         segments = transcribe(audio_path, transcript_path)
-    seg_durations = compute_seg_durations(segments)
     with st.spinner("calculating information density..."):
-        info_densities = compute_info_densities(
-            segments, seg_durations, llm, tokenizer, device
         )
-    total_duration = segments[-1]["end"] - segments[0]["start"]
-    min_sec_leaf = total_duration / max_num_segments
-    smoothed_info_densities = smooth_info_densities(
-        info_densities, seg_durations, max_num_segments, min_sec_leaf
-    )
-    squashed_times, squashed_densities = squash_segs(segments, smoothed_info_densities)
-    squashed_durations = np.array([end - start for start, end in squashed_times])
-    speedups = compute_speedups(squashed_densities)
-    speedups = postprocess_speedups(
-        speedups,
-        speedup_factor,
-        min_speedup,
-        max_speedup,
-        squashed_durations,
-        total_duration,
-    )
     with st.spinner("stitching segments..."):
         cat_clips(squashed_times, speedups, audio_path, output_path)

     with st.spinner("downloading..."):
         name = download(url, YDL_OPTS)
+        assert name.endswith(".m4a")
+        name = name.split(".m4a")[0].split("/")[-1]
+        audio_path = os.path.join(DATA_DIR, "%s.mp3" % name)
+        transcript_path = os.path.join(DATA_DIR, "%s.json" % name)
+        density_path = os.path.join(DATA_DIR, "%s.npy" % name)
+        output_path = os.path.join(DATA_DIR, "%s_smooth.mp3" % name)
     with st.spinner("transcribing..."):
         segments = transcribe(audio_path, transcript_path)
+        seg_durations = compute_seg_durations(segments)
     with st.spinner("calculating information density..."):
+        if os.path.exists(density_path):
+            with open(density_path, "rb") as f:
+                info_densities = np.load(f)
+        else:
+            info_densities = compute_info_densities(
+                segments, seg_durations, llm, tokenizer, device
+            )
+            with open(density_path, "wb") as f:
+                np.save(f, info_densities)
+        total_duration = segments[-1]["end"] - segments[0]["start"]
+        min_sec_leaf = total_duration / max_num_segments
+        smoothed_info_densities = smooth_info_densities(
+            info_densities, seg_durations, max_num_segments, min_sec_leaf
         )
+        squashed_times, squashed_densities = squash_segs(
+            segments, smoothed_info_densities
+        )
+        squashed_durations = np.array([end - start for start, end in squashed_times])
+        speedups = compute_speedups(squashed_densities)
+        speedups = postprocess_speedups(
+            speedups,
+            speedup_factor,
+            min_speedup,
+            max_speedup,
+            squashed_durations,
+            total_duration,
+        )
     with st.spinner("stitching segments..."):
         cat_clips(squashed_times, speedups, audio_path, output_path)