File size: 2,603 Bytes
5da36ef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e42331d
5da36ef
 
 
 
 
 
 
 
 
 
 
 
e42331d
5da36ef
 
 
 
 
 
 
 
 
 
 
3a69ea2
5da36ef
3a69ea2
 
5da36ef
 
3a69ea2
5da36ef
 
 
 
 
3a69ea2
5da36ef
0c25a63
 
 
 
51906e7
ad71124
7338b09
ad71124
 
 
 
 
 
6973620
 
 
 
 
 
 
7338b09
ad71124
f5172c9
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import pandas as pd
import requests
import urllib.parse as urlparse


def get_video_id(url_video):
    """Get video id"""
    query = urlparse.urlparse(url_video)
    if query.hostname == 'youtu.be':
        return query.path[1:]
    if query.hostname in ('www.youtube.com', 'youtube.com'):
        if query.path == '/watch':
            return urlparse.parse_qs(query.query)["v"][0]
        if query.path[:7] == '/embed/' or query.path[:3] == '/v/':
            return query.path.split('/')[2]
    return None

def get_comments(api_key, video_id):
    """Get comments"""
    endpoint = "https://www.googleapis.com/youtube/v3/commentThreads"
    params = {
        "part":"snippet",
        "videoId": video_id,
        "maxResults": 100, 
        "key": api_key,
    }
    response = requests.get(endpoint, params=params)
    res = response.json()

    if "items" in res.keys():
        return {
            num: {
            "text_comment": " ".join(
                x["snippet"]["topLevelComment"]["snippet"][
                    "textOriginal"
                ].splitlines()
            ),
            "publish_data": x["snippet"]["topLevelComment"]["snippet"][
                    "publishedAt"
                ],
            }
            for num, x in enumerate(res['items'])
        }
    
def get_sentim(data, headers, url):
    """Get result of sentimental analysis"""
    res = requests.post(url, headers=headers, json=data)
    res = res.json()[0][0]
    return res['label'], res['score']

def pipeline_sentiment(url_video, api_key, headers, url):
    """Pipeline of sentimental analysis"""
    video_id = get_video_id(url_video)
    comments = get_comments(api_key, video_id)
    comments_df = pd.DataFrame(comments).T 

    text_tuple = [get_sentim(i, headers, url) for i in comments_df["text_comment"]]
    comments_df[["sentiment", "score"]] = pd.DataFrame(list(text_tuple))
    return comments_df

def pipeline_stats(data):
    """Get statistic of sentiment"""
    return data['sentiment'].value_counts(normalize=True).mul(100).round(2)

def pipeline_summarize(data, headers, url, length=2000, max_length=35):
    """Get summarization result"""
    text = " ".join(data)
    result_text = []

    for i in range(0, len(text), length):
        new_text = text[i : i + length]
        payload = {
            "inputs": new_text,
            "parameters": {
                "max_length": max_length
            }       
        }
        res = requests.post(url, headers=headers, json=payload)
        result_text.append(res.json()[0]["generated_text"])

    return " ".join(result_text)