Baskar2005's picture
Update app.py
1a579a9 verified
import streamlit as st
import requests
import html2text
import os
api_key=os.getenv('APIKEY_BRIGHTDATA')
st.title("ByPass Capcha & Text Extractor")
# api_key = st.text_input("Bright Data API Key", type="password")
zone = "web_unlocker1"
url = st.text_input("Target URL", value="https://in.indeed.com/cmp/Ey/reviews")
# url = url + "?hl=en" # or "?lang=en" or "/en/" depending on the site
if st.button("Extract Text"):
if not url :
st.warning("Please enter both API key and URL.")
else:
headers = {
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json",
"Accept-Language": "en-US,en;q=0.9"
}
data = {
"zone": zone,
"url": url,
"format": "raw"
}
with st.spinner("Fetching page..."):
try:
response = requests.post(
"https://api.brightdata.com/request",
json=data,
headers=headers,
timeout=60
)
response.raise_for_status()
html = response.text
# Convert HTML to readable text
text = html2text.html2text(html)
st.subheader("Extracted Text")
st.text_area("Result", text, height=400)
st.download_button("Download as .txt", text, file_name="extracted.txt")
except Exception as e:
st.error(f"Error: {e}")