ammaan commited on
Commit
d951942
1 Parent(s): 45e21b8

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +61 -121
main.py CHANGED
@@ -1,121 +1,61 @@
1
- # from typing import Union
2
- # import re
3
- # from youtube_transcript_api import YouTubeTranscriptApi
4
- # from youtube_transcript_api.formatters import TextFormatter
5
- # import torch
6
- # from transformers import pipeline
7
- # from fastapi import FastAPI
8
- # from fastapi.staticfiles import StaticFiles
9
- # from fastapi.responses import FileResponse
10
-
11
- # app = FastAPI()
12
-
13
- # text_summary = pipeline("summarization", model="Falconsai/text_summarization")
14
-
15
- # @app.get("/getdata")
16
- # def getInput(input: str) -> dict:
17
- # output = text_summary(input)
18
- # return output[0]['summary_text']
19
-
20
- # def extract_video_id(url):
21
- # regex = r"(?:youtube\.com\/(?:[^\/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|\S*?[?&]v=)|youtu\.be\/)([a-zA-Z0-9_-]{11})"
22
- # match = re.search(regex, url)
23
- # if match:
24
- # return match.group(1)
25
- # return None
26
-
27
-
28
- # def get_youtube_transcript(video_url):
29
- # video_id = extract_video_id(video_url)
30
- # if not video_id:
31
- # return "Video ID could not be extracted."
32
-
33
- # try:
34
- # transcript = YouTubeTranscriptApi.get_transcript(video_id)
35
-
36
- # formatter = TextFormatter()
37
- # text_transcript = formatter.format_transcript(transcript)
38
- # summary_text = summary(text_transcript)
39
- # print(summary_text)
40
-
41
- # return summary_text
42
- # except Exception as e:
43
- # return f"An error occurred: {e}"
44
-
45
-
46
-
47
-
48
- # app.mount('/',StaticFiles(directory="static",html=True),name="static")
49
-
50
- # @app.get('/')
51
- # def index() -> FileResponse:
52
- # return FileResponse('/app/static/index.html',media_type="text/html")
53
-
54
-
55
- # if __name__ == "__main__":
56
- # import uvicorn
57
- # uvicorn.run(app, host="127.0.0.1", port=5050)
58
-
59
-
60
-
61
- from typing import Union
62
- import re
63
- from youtube_transcript_api import YouTubeTranscriptApi
64
- from youtube_transcript_api.formatters import TextFormatter
65
- from transformers import pipeline
66
- from fastapi import FastAPI, Query
67
- from fastapi.staticfiles import StaticFiles
68
- from fastapi.responses import FileResponse
69
-
70
- app = FastAPI()
71
-
72
- text_summary = pipeline("summarization", model="Falconsai/text_summarization", max_length=512)
73
-
74
-
75
- def extract_video_id(url: str) -> Union[str, None]:
76
- regex = r"(?:youtube\.com\/(?:[^\/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|\S*?[?&]v=)|youtu\.be\/)([a-zA-Z0-9_-]{11})"
77
- match = re.search(regex, url)
78
- if match:
79
- return match.group(1)
80
- return None
81
-
82
- def get_youtube_transcript(video_url: str) -> Union[str, None]:
83
- video_id = extract_video_id(video_url)
84
- if not video_id:
85
- return "Video ID could not be extracted."
86
- try:
87
- transcript = YouTubeTranscriptApi.get_transcript(video_id)
88
- formatter = TextFormatter()
89
- text_transcript = formatter.format_transcript(transcript)
90
- return text_transcript
91
- except Exception as e:
92
- return f"An error occurred: {e}"
93
-
94
- def summarize_text(text: str) -> str:
95
- summarized_text = text_summary(text)
96
- return summarized_text[0]['summary_text']
97
-
98
- # Get the input from the frontend
99
- @app.get("/getdata")
100
- def get_data(input: str = Query(..., title="YouTube Video URL")) -> dict:
101
- print(input)
102
- transcript = get_youtube_transcript(input)
103
- if transcript:
104
- summary = summarize_text(transcript)
105
- return {"summary": summary}
106
- else:
107
- return {"error": "Failed to get transcript from the YouTube video."}
108
-
109
-
110
-
111
- app.mount('/', StaticFiles(directory="static", html=True), name="static")
112
-
113
-
114
- @app.get('/')
115
- def index() -> FileResponse:
116
- return FileResponse('/app/static/index.html', media_type="text/html")
117
-
118
-
119
- if __name__ == "__main__":
120
- import uvicorn
121
- uvicorn.run(app, host="127.0.0.1", port=5050)
 
1
+ from typing import Union
2
+ import re
3
+ from youtube_transcript_api import YouTubeTranscriptApi
4
+ from youtube_transcript_api.formatters import TextFormatter
5
+ from transformers import pipeline
6
+ from fastapi import FastAPI, Query
7
+ from fastapi.staticfiles import StaticFiles
8
+ from fastapi.responses import FileResponse
9
+
10
+ app = FastAPI()
11
+
12
+ text_summary = pipeline("summarization", model="Falconsai/text_summarization", max_length=12000)
13
+
14
+
15
+ def extract_video_id(url: str) -> Union[str, None]:
16
+ regex = r"(?:youtube\.com\/(?:[^\/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|\S*?[?&]v=)|youtu\.be\/)([a-zA-Z0-9_-]{11})"
17
+ match = re.search(regex, url)
18
+ if match:
19
+ return match.group(1)
20
+ return None
21
+
22
+ def get_youtube_transcript(video_url: str) -> Union[str, None]:
23
+ video_id = extract_video_id(video_url)
24
+ if not video_id:
25
+ return "Video ID could not be extracted."
26
+ try:
27
+ transcript = YouTubeTranscriptApi.get_transcript(video_id)
28
+ formatter = TextFormatter()
29
+ text_transcript = formatter.format_transcript(transcript)
30
+ return text_transcript
31
+ except Exception as e:
32
+ return f"An error occurred: {e}"
33
+
34
+ def summarize_text(text: str) -> str:
35
+ summarized_text = text_summary(text)
36
+ return summarized_text[0]['summary_text']
37
+
38
+ # Get the input from the frontend
39
+ @app.get("/getdata")
40
+ def get_data(input: str = Query(..., title="YouTube Video URL")) -> dict:
41
+ print(input)
42
+ transcript = get_youtube_transcript(input)
43
+ if transcript:
44
+ summary = summarize_text(transcript)
45
+ return {"summary": summary}
46
+ else:
47
+ return {"error": "Failed to get transcript from the YouTube video."}
48
+
49
+
50
+
51
+ app.mount('/', StaticFiles(directory="static", html=True), name="static")
52
+
53
+
54
+ @app.get('/')
55
+ def index() -> FileResponse:
56
+ return FileResponse('/app/static/index.html', media_type="text/html")
57
+
58
+
59
+ if __name__ == "__main__":
60
+ import uvicorn
61
+ uvicorn.run(app, host="127.0.0.1", port=5050)