Neon-AI commited on
Commit
32528da
·
verified ·
1 Parent(s): a3a999c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -10
app.py CHANGED
@@ -1,6 +1,8 @@
1
  import streamlit as st
2
  import subprocess
3
  import json
 
 
4
 
5
  st.title("Advanced Scrapy Scraper")
6
 
@@ -8,14 +10,45 @@ url = st.text_input("Start URL", "https://example.com")
8
  run = st.button("Run Scrape")
9
 
10
  if run and url:
 
 
11
  with st.spinner("Scraping..."):
12
- cmd = ["scrapy", "crawl", "advanced", "-a", f"start_url={url}", "-o", "output.json", "--nolog"]
13
- subprocess.run(cmd, cwd=".", shell=False) # assume spider accepts -a start_url
14
-
15
- try:
16
- with open("output.json") as f:
17
- data = json.load(f)
18
- st.json(data)
19
- st.download_button("Download JSON", json.dumps(data), "results.json")
20
- except:
21
- st.error("No output")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  import subprocess
3
  import json
4
+ import os
5
+ import uuid
6
 
7
  st.title("Advanced Scrapy Scraper")
8
 
 
10
  run = st.button("Run Scrape")
11
 
12
  if run and url:
13
+ output_file = f"output_{uuid.uuid4().hex}.json" # unique file per run
14
+
15
  with st.spinner("Scraping..."):
16
+ cmd = [
17
+ "scrapy", "crawl", "advanced",
18
+ "-a", f"start_url={url}",
19
+ "-o", output_file,
20
+ "--nolog"
21
+ ]
22
+
23
+ result = subprocess.run(
24
+ cmd,
25
+ cwd=".",
26
+ capture_output=True,
27
+ text=True
28
+ )
29
+
30
+ # Check if Scrapy failed
31
+ if result.returncode != 0:
32
+ st.error("Scrapy failed")
33
+ st.text(result.stderr)
34
+ else:
35
+ if os.path.exists(output_file):
36
+ try:
37
+ with open(output_file, "r") as f:
38
+ data = json.load(f)
39
+
40
+ st.success("Scrape complete")
41
+ st.json(data)
42
+
43
+ st.download_button(
44
+ "Download JSON",
45
+ json.dumps(data, indent=2),
46
+ "results.json",
47
+ "application/json"
48
+ )
49
+
50
+ except json.JSONDecodeError:
51
+ st.error("Output file is not valid JSON")
52
+
53
+ else:
54
+ st.error("No output file generated")