Spaces:
Sleeping
Sleeping
Update main.py
Browse files
main.py
CHANGED
@@ -12,7 +12,7 @@ app = FastAPI()
|
|
12 |
text_summary = pipeline("summarization", model="Falconsai/text_summarization")
|
13 |
|
14 |
|
15 |
-
def extract_video_id(url)
|
16 |
regex = r"(?:youtube\.com\/(?:[^\/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|\S*?[?&]v=)|youtu\.be\/)([a-zA-Z0-9_-]{11})"
|
17 |
match = re.search(regex, url)
|
18 |
if match:
|
@@ -31,20 +31,6 @@ def get_youtube_transcript(video_url):
|
|
31 |
except Exception as e:
|
32 |
return f"An error occurred: {e}"
|
33 |
|
34 |
-
def preprocess_text(text):
|
35 |
-
# Split the text into sentences
|
36 |
-
sentences = re.split(r'(?<=[.!?]) +', text)
|
37 |
-
return sentences
|
38 |
-
|
39 |
-
def summarize_text(text):
|
40 |
-
try:
|
41 |
-
# Preprocess the text
|
42 |
-
sentences = preprocess_text(str(text))
|
43 |
-
# Summarize the text
|
44 |
-
summarized_text = text_summary(sentences)
|
45 |
-
return summarized_text[0]['summary_text']
|
46 |
-
except Exception as e:
|
47 |
-
return f"An error occurred: {e}"
|
48 |
|
49 |
# Get the input from the frontend
|
50 |
@app.get("/getdata")
|
@@ -52,8 +38,8 @@ def get_data(input):
|
|
52 |
print(input)
|
53 |
transcript = get_youtube_transcript(input)
|
54 |
if transcript:
|
55 |
-
summary =
|
56 |
-
return {"summary": summary}
|
57 |
else:
|
58 |
return {"error": "Failed to get transcript from the YouTube video."}
|
59 |
|
|
|
12 |
text_summary = pipeline("summarization", model="Falconsai/text_summarization")
|
13 |
|
14 |
|
15 |
+
def extract_video_id(url):
|
16 |
regex = r"(?:youtube\.com\/(?:[^\/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|\S*?[?&]v=)|youtu\.be\/)([a-zA-Z0-9_-]{11})"
|
17 |
match = re.search(regex, url)
|
18 |
if match:
|
|
|
31 |
except Exception as e:
|
32 |
return f"An error occurred: {e}"
|
33 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
|
35 |
# Get the input from the frontend
|
36 |
@app.get("/getdata")
|
|
|
38 |
print(input)
|
39 |
transcript = get_youtube_transcript(input)
|
40 |
if transcript:
|
41 |
+
summary = text_summary(transcript,min_length=10,max_length=1000,do_sample=False)
|
42 |
+
return {"summary": summary[0]['summary_text']}
|
43 |
else:
|
44 |
return {"error": "Failed to get transcript from the YouTube video."}
|
45 |
|