pranked03 commited on
Commit
4289c7d
1 Parent(s): 87bc22d

new changes

Browse files
.DS_Store ADDED
Binary file (6.15 kB). View file
 
Responses/amazon.json ADDED
The diff for this file is too large to render. See raw diff
 
Responses/facebook.json ADDED
The diff for this file is too large to render. See raw diff
 
Responses/google.json ADDED
The diff for this file is too large to render. See raw diff
 
Responses/semrush.json ADDED
The diff for this file is too large to render. See raw diff
 
Responses/upthrust.json ADDED
The diff for this file is too large to render. See raw diff
 
__pycache__/ask_questions.cpython-311.pyc ADDED
Binary file (3.83 kB). View file
 
__pycache__/pagespeed.cpython-311.pyc ADDED
Binary file (4.38 kB). View file
 
app.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from pagespeed import generate_response, process_data
3
+ from ask_questions import answer_question
4
+ import pandas as pd
5
+ import numpy as np
6
+
7
+ df = pd.DataFrame()
8
+ df=pd.read_csv('processed/embeddings.csv', index_col=0)
9
+ df['embeddings'] = df['embeddings'].apply(eval).apply(np.array)
10
+ # Set the title
11
+
12
+ if "button" not in st.session_state:
13
+ st.session_state.button = False
14
+
15
+ st.title("PageSpeed Insights")
16
+
17
+ #start app
18
+ st.write("Enter a URL to get a PageSpeed Insights report")
19
+
20
+ # Get the URL from the user
21
+ url = st.text_input("URL", "https://www.google.com")
22
+
23
+ # If the user clicks the button
24
+
25
+ if st.button("Get Report") or st.session_state.button:
26
+ with st.spinner(text="Collecting data..."):
27
+ st.session_state.button = True
28
+ # Get the response
29
+ data = generate_response(url)
30
+ # Process the data
31
+ issues = process_data(data)
32
+ # Show the data
33
+
34
+ # for each issue in issues, make the title as an st.expander. When the expander is clicked, it shows its description and item. Also add a button in which the user can click to get the answer to the question.
35
+
36
+ for index, issue in enumerate(issues):
37
+ title = issue["title"]
38
+ desc = issue["description"]
39
+ item = issue["item"]
40
+
41
+ with st.expander(title):
42
+ st.write(desc)
43
+ st.write(item)
44
+ if st.button("Fix Issue", key=index):
45
+ question = f"Title: {title}\nDescription: {desc}\nItem: {item}"
46
+ st.write(answer_question(df, question=issue["description"], debug=False))
ask_questions.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import re
3
+ import urllib.request
4
+ from bs4 import BeautifulSoup
5
+ from collections import deque
6
+ from html.parser import HTMLParser
7
+ from urllib.parse import urlparse
8
+ import os
9
+ import pandas as pd
10
+ import tiktoken
11
+ import openai
12
+ import numpy as np
13
+ from openai.embeddings_utils import distances_from_embeddings, cosine_similarity
14
+ import streamlit as st
15
+ openai.api_key = st.secrets["openai_api_key"]
16
+
17
+
18
+ df=pd.read_csv('processed/embeddings.csv', index_col=0)
19
+ df['embeddings'] = df['embeddings'].apply(eval).apply(np.array)
20
+
21
+ def create_context(
22
+ question, df, max_len=1800, size="ada"
23
+ ):
24
+
25
+ # Create a context for a question by finding the most similar context from the dataframe
26
+
27
+
28
+ # Get the embeddings for the question
29
+ q_embeddings = openai.Embedding.create(input=question, engine='text-embedding-ada-002')['data'][0]['embedding']
30
+
31
+ # Get the distances from the embeddings
32
+ df['distances'] = distances_from_embeddings(q_embeddings, df['embeddings'].values, distance_metric='cosine')
33
+
34
+
35
+ returns = []
36
+ cur_len = 0
37
+
38
+ # Sort by distance and add the text to the context until the context is too long
39
+ for i, row in df.sort_values('distances', ascending=True).iterrows():
40
+
41
+ # Add the length of the text to the current length
42
+ cur_len += row['n_tokens'] + 4
43
+
44
+ # If the context is too long, break
45
+ if cur_len > max_len:
46
+ break
47
+
48
+ # Else add it to the text that is being returned
49
+ returns.append(row["text"])
50
+
51
+ # Return the context
52
+ return "\n\n###\n\n".join(returns)
53
+
54
+ def answer_question(
55
+ df,
56
+ model="text-davinci-003",
57
+ question="Am I allowed to publish model outputs to Twitter, without a human review?",
58
+ max_len=3000,
59
+ size="ada",
60
+ debug=False,
61
+ max_tokens=500,
62
+ stop_sequence=None
63
+ ):
64
+ #Answer a question based on the most similar context from the dataframe texts
65
+ context = create_context(
66
+ question,
67
+ df,
68
+ max_len=max_len,
69
+ size=size,
70
+ )
71
+ # If debug, print the raw model response
72
+ if debug:
73
+ print("Context:\n" + context)
74
+ print("\n\n")
75
+
76
+ try:
77
+ # Create a completions using the questin and context
78
+ response = openai.Completion.create(
79
+ prompt=f"You are an SEO anaylzer. \nYou will be given:\n An issue, \nthe description of the issue\nthe items that cause the issue.\n Describe the Issue. Show the appropriate solution to the issue. Implement the solution to the issue and show the fix on the given items in the issue. Give only the fix to the item given in the issue. \nContext: {context}\n\n---\n\nIssue: {question}",
80
+ temperature=0,
81
+ max_tokens=max_tokens,
82
+ top_p=1,
83
+ frequency_penalty=0,
84
+ presence_penalty=0,
85
+ stop=stop_sequence,
86
+ model=model
87
+ )
88
+ return response["choices"][0]["text"].strip()
89
+ except Exception as e:
90
+ print(e)
91
+ return ""
92
+
pagespeed.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import json
3
+ import os
4
+ import re
5
+ import pandas as pd
6
+ from ask_questions import answer_question
7
+ import numpy as np
8
+ import streamlit as st
9
+
10
+ df = pd.DataFrame()
11
+
12
+ def extract_url_from_string(string):
13
+ return re.search("(?P<url>https?://[^\s]+)", string).group("url")
14
+
15
+ def process_data(data):
16
+ audits = [data["lighthouseResult"]["audits"][i] for i in data["lighthouseResult"]["audits"]]
17
+ audits_names = [i["title"] for i in audits]
18
+
19
+ scoresdisplays = [data["lighthouseResult"]["audits"][i]["scoreDisplayMode"] for i in data["lighthouseResult"]["audits"]]
20
+
21
+ df=pd.read_csv('processed/embeddings.csv', index_col=0)
22
+ df['embeddings'] = df['embeddings'].apply(eval).apply(np.array)
23
+ issues = []
24
+ for i in audits:
25
+ if i["scoreDisplayMode"] != "notApplicable" and (i["score"] != 1 and i["score"] != None) and "details" in i.keys() and i["scoreDisplayMode"] != "informative":
26
+ title = i["title"]
27
+ desc = i["description"]
28
+ item = i["details"]["items"][0]
29
+ typeOfIssue = i["details"]["type"]
30
+ dicto = {"title": title, "description": desc, "item": item, "type": typeOfIssue}
31
+ issues.append(dicto)
32
+ print(title)
33
+ print(i["details"]["type"])
34
+ question = f"Title: {title}\nDescription: {desc}\nItem: {item}"
35
+ #print(answer_question(df, question=question, debug=False))
36
+ print("***********************************")
37
+ return issues
38
+
39
+
40
+ def generate_response(website_url, url = "https://www.googleapis.com/pagespeedonline/v5/runPagespeed", api_key=st.secrets["page_speed_api_key"]):
41
+ print("Website: " + website_url)
42
+ print()
43
+ name = website_url.split("//")[1].split(".")[1] # Get the name of the website
44
+
45
+ params = {
46
+ "url": website_url,
47
+ "key": api_key,
48
+ "category": ["performance", "accessibility", "best_practices", "seo"]
49
+ }
50
+
51
+ try:
52
+ #output_file_path = f"Responses/{name}.json"
53
+ #if not os.path.exists(output_file_path):
54
+
55
+ response = requests.get(url, params=params)
56
+ response.raise_for_status() # Check for any request errors
57
+
58
+ data = response.json()
59
+ """
60
+ with open(output_file_path, "w") as output_file:
61
+ json.dump(data, output_file, indent=4)
62
+ else:
63
+ with open(output_file_path) as output_file:
64
+ data = json.load(output_file)"""
65
+
66
+ # Process the data as needed
67
+ return data
68
+
69
+ except requests.exceptions.RequestException as e:
70
+ print("Error:", e)
71
+ #for i in list_of_urls:
72
+ # data = generate_response(i)
73
+ # process_data(data)
74
+ #https://chat.openai.com/share/71d7a128-b56d-4368-9eee-beda874e4200
processed/embeddings.csv ADDED
The diff for this file is too large to render. See raw diff
 
processed/scraped.csv ADDED
The diff for this file is too large to render. See raw diff