|
import streamlit as st |
|
import requests |
|
import html2text |
|
import os |
|
|
|
api_key=os.getenv('APIKEY_BRIGHTDATA') |
|
st.title("ByPass Capcha & Text Extractor") |
|
|
|
|
|
zone = "web_unlocker1" |
|
url = st.text_input("Target URL", value="https://in.indeed.com/cmp/Ey/reviews") |
|
|
|
|
|
|
|
if st.button("Extract Text"): |
|
if not url : |
|
st.warning("Please enter both API key and URL.") |
|
else: |
|
headers = { |
|
"Authorization": f"Bearer {api_key}", |
|
"Content-Type": "application/json", |
|
"Accept-Language": "en-US,en;q=0.9" |
|
} |
|
data = { |
|
"zone": zone, |
|
"url": url, |
|
"format": "raw" |
|
} |
|
with st.spinner("Fetching page..."): |
|
try: |
|
response = requests.post( |
|
"https://api.brightdata.com/request", |
|
json=data, |
|
headers=headers, |
|
timeout=60 |
|
) |
|
response.raise_for_status() |
|
html = response.text |
|
|
|
text = html2text.html2text(html) |
|
st.subheader("Extracted Text") |
|
st.text_area("Result", text, height=400) |
|
st.download_button("Download as .txt", text, file_name="extracted.txt") |
|
except Exception as e: |
|
st.error(f"Error: {e}") |