Spaces:
Sleeping
Sleeping
File size: 2,328 Bytes
79b94f8 359769b 79b94f8 359769b 79b94f8 359769b 79b94f8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 |
import models as md
import nltk
from threading import Thread
import openai
import os
nltk.download("punkt")
class TextSummarizer:
def __init__(self, title):
self.title = title
self.model = "gpt-3.5-turbo"
self.summarizer = md.load_summary_model()
openai.api_key = os.getenv("OPENAI_API_KEY")
def generate_short_summary(self, summary_chunks:dict) -> list:
PROMPT = """
You are a helpful assistant that summarizes youtube videos.
Someone has already summarized the video to key points.
Summarize the key points in at most two sentences that capture the essence of the passage.
"""
final_summary = []
for summary_chunk in summary_chunks:
response = openai.ChatCompletion.create(
model=self.model,
messages=[
{"role": "system", "content": PROMPT},
{"role": "user", "content": summary_chunk},
],
)
summary = response["choices"][0]["message"]["content"]
final_summary.append(summary)
return final_summary
def generate_full_summary(self, text_chunks_lib:dict) -> str:
sum_dict = dict()
chunk_summaries = []
def generate_chunk_summary(text_chunk:str, i: int) -> str:
chunk_summary = md.summarizer_gen(self.summarizer, sequence=text_chunk, maximum_tokens=500, minimum_tokens=100)
chunk_summaries[i] = chunk_summary
for _, key in enumerate(text_chunks_lib):
summary = []
threads = []
# make the chunk summaries in parallel
chunk_summaries = [None] * len(text_chunks_lib[key])
for i, text_chunk in enumerate(text_chunks_lib[key]):
threads.append(Thread(target=generate_chunk_summary, args=(text_chunk, i)))
for thread in threads:
thread.start()
for thread in threads:
thread.join()
final_summary = "\n\n".join(chunk_summaries)
sum_dict[key] = [final_summary]
return sum_dict[self.title][0]
|