Spaces:

upthrustinc
/

seoAnalyzerGPT

Runtime error

App Files Files Community

pranked03 commited on Jun 16, 2023

Commit

4289c7d

•

1 Parent(s): 87bc22d

new changes

Browse files

Files changed (13) hide show

.DS_Store +0 -0
Responses/amazon.json +0 -0
Responses/facebook.json +0 -0
Responses/google.json +0 -0
Responses/semrush.json +0 -0
Responses/upthrust.json +0 -0
__pycache__/ask_questions.cpython-311.pyc +0 -0
__pycache__/pagespeed.cpython-311.pyc +0 -0
app.py +46 -0
ask_questions.py +92 -0
pagespeed.py +74 -0
processed/embeddings.csv +0 -0
processed/scraped.csv +0 -0

.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

Responses/amazon.json ADDED Viewed

The diff for this file is too large to render. See raw diff

Responses/facebook.json ADDED Viewed

The diff for this file is too large to render. See raw diff

Responses/google.json ADDED Viewed

The diff for this file is too large to render. See raw diff

Responses/semrush.json ADDED Viewed

The diff for this file is too large to render. See raw diff

Responses/upthrust.json ADDED Viewed

The diff for this file is too large to render. See raw diff

__pycache__/ask_questions.cpython-311.pyc ADDED Viewed

Binary file (3.83 kB). View file

__pycache__/pagespeed.cpython-311.pyc ADDED Viewed

Binary file (4.38 kB). View file

app.py ADDED Viewed

	@@ -0,0 +1,46 @@

+import streamlit as st
+from pagespeed import generate_response, process_data
+from ask_questions import answer_question
+import pandas as pd
+import numpy as np
+df = pd.DataFrame()
+df=pd.read_csv('processed/embeddings.csv', index_col=0)
+df['embeddings'] = df['embeddings'].apply(eval).apply(np.array)
+# Set the title
+if "button" not in st.session_state:
+    st.session_state.button = False
+st.title("PageSpeed Insights")
+#start app
+st.write("Enter a URL to get a PageSpeed Insights report")
+# Get the URL from the user
+url = st.text_input("URL", "https://www.google.com")
+# If the user clicks the button
+if st.button("Get Report") or st.session_state.button:
+    with st.spinner(text="Collecting data..."):
+        st.session_state.button = True
+        # Get the response
+        data = generate_response(url)
+        # Process the data
+        issues = process_data(data)
+        # Show the data
+        # for each issue in issues, make the title as an st.expander. When the expander is clicked, it shows its description and item. Also add a button in which the user can click to get the answer to the question.
+        for index, issue in enumerate(issues):
+            title = issue["title"]
+            desc = issue["description"]
+            item = issue["item"]
+            with st.expander(title):
+                st.write(desc)
+                st.write(item)
+                if st.button("Fix Issue", key=index):
+                    question = f"Title: {title}\nDescription: {desc}\nItem: {item}"
+                    st.write(answer_question(df, question=issue["description"], debug=False))

ask_questions.py ADDED Viewed

	@@ -0,0 +1,92 @@

+import requests
+import re
+import urllib.request
+from bs4 import BeautifulSoup
+from collections import deque
+from html.parser import HTMLParser
+from urllib.parse import urlparse
+import os
+import pandas as pd
+import tiktoken
+import openai
+import numpy as np
+from openai.embeddings_utils import distances_from_embeddings, cosine_similarity
+import streamlit as st
+openai.api_key = st.secrets["openai_api_key"]
+df=pd.read_csv('processed/embeddings.csv', index_col=0)
+df['embeddings'] = df['embeddings'].apply(eval).apply(np.array)
+def create_context(
+    question, df, max_len=1800, size="ada"
+):
+   # Create a context for a question by finding the most similar context from the dataframe
+    # Get the embeddings for the question
+    q_embeddings = openai.Embedding.create(input=question, engine='text-embedding-ada-002')['data'][0]['embedding']
+    # Get the distances from the embeddings
+    df['distances'] = distances_from_embeddings(q_embeddings, df['embeddings'].values, distance_metric='cosine')
+    returns = []
+    cur_len = 0
+    # Sort by distance and add the text to the context until the context is too long
+    for i, row in df.sort_values('distances', ascending=True).iterrows():
+        # Add the length of the text to the current length
+        cur_len += row['n_tokens'] + 4
+        # If the context is too long, break
+        if cur_len > max_len:
+            break
+        # Else add it to the text that is being returned
+        returns.append(row["text"])
+    # Return the context
+    return "\n\n###\n\n".join(returns)
+def answer_question(
+    df,
+    model="text-davinci-003",
+    question="Am I allowed to publish model outputs to Twitter, without a human review?",
+    max_len=3000,
+    size="ada",
+    debug=False,
+    max_tokens=500,
+    stop_sequence=None
+):
+    #Answer a question based on the most similar context from the dataframe texts
+    context = create_context(
+        question,
+        df,
+        max_len=max_len,
+        size=size,
+    )
+    # If debug, print the raw model response
+    if debug:
+        print("Context:\n" + context)
+        print("\n\n")
+    try:
+        # Create a completions using the questin and context
+        response = openai.Completion.create(
+            prompt=f"You are an SEO anaylzer. \nYou will be given:\n An issue, \nthe description of the issue\nthe items that cause the issue.\n Describe the Issue. Show the appropriate solution to the issue. Implement the solution to the issue and show the fix on the given items in the issue. Give only the fix to the item given in the issue. \nContext: {context}\n\n---\n\nIssue: {question}",
+            temperature=0,
+            max_tokens=max_tokens,
+            top_p=1,
+            frequency_penalty=0,
+            presence_penalty=0,
+            stop=stop_sequence,
+            model=model
+        )
+        return response["choices"][0]["text"].strip()
+    except Exception as e:
+        print(e)
+        return ""

pagespeed.py ADDED Viewed

	@@ -0,0 +1,74 @@

+import requests
+import json
+import os
+import re
+import pandas as pd
+from ask_questions import answer_question
+import numpy as np
+import streamlit as st
+df = pd.DataFrame()
+def extract_url_from_string(string):
+    return re.search("(?P<url>https?://[^\s]+)", string).group("url")
+def process_data(data):
+    audits = [data["lighthouseResult"]["audits"][i] for i in data["lighthouseResult"]["audits"]]
+    audits_names = [i["title"] for i in audits]
+    scoresdisplays = [data["lighthouseResult"]["audits"][i]["scoreDisplayMode"] for i in data["lighthouseResult"]["audits"]]
+    df=pd.read_csv('processed/embeddings.csv', index_col=0)
+    df['embeddings'] = df['embeddings'].apply(eval).apply(np.array)
+    issues = []
+    for i in audits:
+        if i["scoreDisplayMode"] != "notApplicable" and (i["score"] != 1 and i["score"] != None) and "details" in i.keys() and i["scoreDisplayMode"] != "informative":
+            title = i["title"]
+            desc = i["description"]
+            item = i["details"]["items"][0]
+            typeOfIssue = i["details"]["type"]
+            dicto = {"title": title, "description": desc, "item": item, "type": typeOfIssue}
+            issues.append(dicto)
+            print(title)
+            print(i["details"]["type"])
+            question = f"Title: {title}\nDescription: {desc}\nItem: {item}"
+            #print(answer_question(df, question=question, debug=False))
+            print("***********************************")
+    return issues
+def generate_response(website_url, url = "https://www.googleapis.com/pagespeedonline/v5/runPagespeed", api_key=st.secrets["page_speed_api_key"]):
+    print("Website: " + website_url)
+    print()
+    name = website_url.split("//")[1].split(".")[1] # Get the name of the website
+    params = {
+        "url": website_url,
+        "key": api_key,
+        "category": ["performance", "accessibility", "best_practices", "seo"]
+    }
+    try:
+        #output_file_path = f"Responses/{name}.json"
+        #if not os.path.exists(output_file_path):
+        response = requests.get(url, params=params)
+        response.raise_for_status()  # Check for any request errors
+        data = response.json()
+        """
+            with open(output_file_path, "w") as output_file:
+                json.dump(data, output_file, indent=4)
+        else:
+            with open(output_file_path) as output_file:
+                data = json.load(output_file)"""
+        # Process the data as needed
+        return data
+    except requests.exceptions.RequestException as e:
+        print("Error:", e)
+#for i in list_of_urls:
+#    data = generate_response(i)
+#    process_data(data)
+#https://chat.openai.com/share/71d7a128-b56d-4368-9eee-beda874e4200

processed/embeddings.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

processed/scraped.csv ADDED Viewed

The diff for this file is too large to render. See raw diff