ammaan commited on
Commit
f109e0f
1 Parent(s): a26501a

Upload 3 files

Browse files
Files changed (3) hide show
  1. Dockerfile +11 -0
  2. main.py +121 -0
  3. requirements.txt +8 -0
Dockerfile ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+
3
+ WORKDIR /code
4
+
5
+ COPY ./requirements.txt /code/requirements.txt
6
+
7
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
8
+
9
+ COPY . .
10
+
11
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
main.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # from typing import Union
2
+ # import re
3
+ # from youtube_transcript_api import YouTubeTranscriptApi
4
+ # from youtube_transcript_api.formatters import TextFormatter
5
+ # import torch
6
+ # from transformers import pipeline
7
+ # from fastapi import FastAPI
8
+ # from fastapi.staticfiles import StaticFiles
9
+ # from fastapi.responses import FileResponse
10
+
11
+ # app = FastAPI()
12
+
13
+ # text_summary = pipeline("summarization", model="Falconsai/text_summarization")
14
+
15
+ # @app.get("/getdata")
16
+ # def getInput(input: str) -> dict:
17
+ # output = text_summary(input)
18
+ # return output[0]['summary_text']
19
+
20
+ # def extract_video_id(url):
21
+ # regex = r"(?:youtube\.com\/(?:[^\/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|\S*?[?&]v=)|youtu\.be\/)([a-zA-Z0-9_-]{11})"
22
+ # match = re.search(regex, url)
23
+ # if match:
24
+ # return match.group(1)
25
+ # return None
26
+
27
+
28
+ # def get_youtube_transcript(video_url):
29
+ # video_id = extract_video_id(video_url)
30
+ # if not video_id:
31
+ # return "Video ID could not be extracted."
32
+
33
+ # try:
34
+ # transcript = YouTubeTranscriptApi.get_transcript(video_id)
35
+
36
+ # formatter = TextFormatter()
37
+ # text_transcript = formatter.format_transcript(transcript)
38
+ # summary_text = summary(text_transcript)
39
+ # print(summary_text)
40
+
41
+ # return summary_text
42
+ # except Exception as e:
43
+ # return f"An error occurred: {e}"
44
+
45
+
46
+
47
+
48
+ # app.mount('/',StaticFiles(directory="static",html=True),name="static")
49
+
50
+ # @app.get('/')
51
+ # def index() -> FileResponse:
52
+ # return FileResponse('/app/static/index.html',media_type="text/html")
53
+
54
+
55
+ # if __name__ == "__main__":
56
+ # import uvicorn
57
+ # uvicorn.run(app, host="127.0.0.1", port=5050)
58
+
59
+
60
+
61
+ from typing import Union
62
+ import re
63
+ from youtube_transcript_api import YouTubeTranscriptApi
64
+ from youtube_transcript_api.formatters import TextFormatter
65
+ from transformers import pipeline
66
+ from fastapi import FastAPI, Query
67
+ from fastapi.staticfiles import StaticFiles
68
+ from fastapi.responses import FileResponse
69
+
70
+ app = FastAPI()
71
+
72
+ text_summary = pipeline("summarization", model="Falconsai/text_summarization", max_length=512)
73
+
74
+
75
+ def extract_video_id(url: str) -> Union[str, None]:
76
+ regex = r"(?:youtube\.com\/(?:[^\/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|\S*?[?&]v=)|youtu\.be\/)([a-zA-Z0-9_-]{11})"
77
+ match = re.search(regex, url)
78
+ if match:
79
+ return match.group(1)
80
+ return None
81
+
82
+ def get_youtube_transcript(video_url: str) -> Union[str, None]:
83
+ video_id = extract_video_id(video_url)
84
+ if not video_id:
85
+ return "Video ID could not be extracted."
86
+ try:
87
+ transcript = YouTubeTranscriptApi.get_transcript(video_id)
88
+ formatter = TextFormatter()
89
+ text_transcript = formatter.format_transcript(transcript)
90
+ return text_transcript
91
+ except Exception as e:
92
+ return f"An error occurred: {e}"
93
+
94
+ def summarize_text(text: str) -> str:
95
+ summarized_text = text_summary(text)
96
+ return summarized_text[0]['summary_text']
97
+
98
+ # Get the input from the frontend
99
+ @app.get("/getdata")
100
+ def get_data(input: str = Query(..., title="YouTube Video URL")) -> dict:
101
+ print(input)
102
+ transcript = get_youtube_transcript(input)
103
+ if transcript:
104
+ summary = summarize_text(transcript)
105
+ return {"summary": summary}
106
+ else:
107
+ return {"error": "Failed to get transcript from the YouTube video."}
108
+
109
+
110
+
111
+ app.mount('/', StaticFiles(directory="static", html=True), name="static")
112
+
113
+
114
+ @app.get('/')
115
+ def index() -> FileResponse:
116
+ return FileResponse('/app/static/index.html', media_type="text/html")
117
+
118
+
119
+ if __name__ == "__main__":
120
+ import uvicorn
121
+ uvicorn.run(app, host="127.0.0.1", port=5050)
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ fastapi==0.74.*
2
+ requests==2.27.*
3
+ sentencepiece==0.1.*
4
+ torch==1.11.*
5
+ transformers==4.*
6
+ uvicorn[standard]==0.17.*
7
+ datasets
8
+ youtube_transcript_api