Chaninder Rishi commited on
Commit
a30e7d0
·
1 Parent(s): daa2a54

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -0
app.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import csv
4
+ import json
5
+ import matplotlib.pyplot as plt
6
+ import ast
7
+ from sklearn import linear_model
8
+
9
+
10
+ df = pd.read_csv('emily_election.csv')
11
+
12
+
13
+ df['runtime'] = df['cumulative_ad_runtime'].apply(lambda s: int(s.split('days')[0]))
14
+ df['impressions'] = df['cumulative_impressions_by_region'].apply(lambda d: ast.literal_eval(d))
15
+ df['impressions'] = df['impressions'].apply(lambda d: np.array(list(d.values())).sum())
16
+
17
+ #feature 3 (for later)
18
+ df['audience_size'] = df['cumulative_est_audience'].apply(lambda d: ast.literal_eval(d))
19
+ df['audience_size'] = df['audience_size'].apply(lambda d: np.array(list(d.values())).sum())
20
+
21
+ #data = df[['runtime', 'spend', 'impressions']]
22
+ data = df[['runtime', 'spend', 'audience_size','impressions']]
23
+
24
+ msk = np.random.rand(len(data)) < 0.8
25
+ train = data[msk]
26
+ test = data[~msk]
27
+
28
+
29
+ #new_train = train[train['impressions'] < 1000000]
30
+ new_train = train[(train['spend'] > 250)]
31
+ new_train = new_train[new_train['runtime']>4]
32
+ new_train.shape
33
+
34
+
35
+ #this model predicts impressions given the runtime and the spend
36
+
37
+ regr = linear_model.LinearRegression()
38
+ new_train['log_runtime'] = np.log(new_train['runtime'])
39
+ new_train['log_spend'] = np.log(new_train['spend'])
40
+ new_train['log_impressions'] = np.log(new_train['impressions'])
41
+ new_train.replace([np.inf, -np.inf], np.nan, inplace=True)
42
+ new_train.dropna(inplace=True)
43
+ x = np.asanyarray(new_train[['log_runtime', 'log_spend']])
44
+ y = np.asanyarray(new_train[['log_impressions']])
45
+ regr.fit (x, y)
46
+ y_pred= regr.predict(new_train[['log_runtime', 'log_spend']])
47
+ # # The coefficients
48
+ print(regr.coef_)
49
+ print('R-squared score: %.2f' % regr.score(x, y))
50
+ print('Standard Deviation: %.2f' % np.sqrt(sum((y - y_pred)**2) / (len(y) - 2)))
51
+
52
+
53
+