ckfrpark commited on
Commit
2854fa8
ยท
verified ยท
1 Parent(s): f9458f6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -16
app.py CHANGED
@@ -1,21 +1,26 @@
1
- import gradio as gr
2
  import requests
3
  from bs4 import BeautifulSoup
 
4
 
5
- def scrape_website(url):
6
- # ์›น ํŽ˜์ด์ง€์˜ ๋‚ด์šฉ์„ ๊ฐ€์ ธ์˜ต๋‹ˆ๋‹ค.
7
- response = requests.get(url)
8
- # BeautifulSoup ๊ฐ์ฒด๋ฅผ ์ƒ์„ฑํ•˜์—ฌ HTML์„ ํŒŒ์‹ฑํ•ฉ๋‹ˆ๋‹ค.
9
- soup = BeautifulSoup(response.text, 'html.parser')
10
- # ์›น ํŽ˜์ด์ง€์˜ ํƒ€์ดํ‹€์„ ์ถ”์ถœํ•ฉ๋‹ˆ๋‹ค.
11
- title = soup.find('title').text
12
- return title
13
 
14
- with gr.Blocks() as demo:
15
- gr.Markdown("### ์›น ์Šคํฌ๋ž˜ํ•‘ ํ”„๋กœ๊ทธ๋žจ")
16
- url_input = gr.Textbox(label="URL์„ ์ž…๋ ฅํ•˜์„ธ์š”")
17
- output = gr.Textbox(label="์›น ํŽ˜์ด์ง€ ํƒ€์ดํ‹€")
18
- gr.Button("์Šคํฌ๋žฉ").click(scrape_website, inputs=url_input, outputs=output)
19
 
20
- if __name__ == "__main__":
21
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ํ•„์š”ํ•œ ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๋ฅผ ์ž„ํฌํŠธํ•ฉ๋‹ˆ๋‹ค.
2
  import requests
3
  from bs4 import BeautifulSoup
4
+ import pandas as pd
5
 
6
+ # ์›น ํŽ˜์ด์ง€์˜ URL์„ ์ง€์ •ํ•ฉ๋‹ˆ๋‹ค.
7
+ url = '์—ฌ๊ธฐ์—_์ถ”์ถœํ•˜๊ณ ์ž_ํ•˜๋Š”_์›นํŽ˜์ด์ง€์˜_URL์„_์ž…๋ ฅํ•˜์„ธ์š”'
 
 
 
 
 
 
8
 
9
+ # requests๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ์›น ํŽ˜์ด์ง€์˜ ๋‚ด์šฉ์„ ๊ฐ€์ ธ์˜ต๋‹ˆ๋‹ค.
10
+ response = requests.get(url)
 
 
 
11
 
12
+ # BeautifulSoup ๊ฐ์ฒด๋ฅผ ์ƒ์„ฑํ•˜์—ฌ HTML์„ ํŒŒ์‹ฑํ•ฉ๋‹ˆ๋‹ค.
13
+ soup = BeautifulSoup(response.text, 'html.parser')
14
+
15
+ # ์›น ํŽ˜์ด์ง€์˜ ํŠน์ • ๋ถ€๋ถ„์„ ์„ ํƒํ•˜์—ฌ ๋ฐ์ดํ„ฐ๋ฅผ ์ถ”์ถœํ•ฉ๋‹ˆ๋‹ค.
16
+ # ์˜ˆ์‹œ: ํŽ˜์ด์ง€์˜ ๋ชจ๋“  'p' ํƒœ๊ทธ์— ์žˆ๋Š” ํ…์ŠคํŠธ๋ฅผ ์ถ”์ถœํ•ฉ๋‹ˆ๋‹ค.
17
+ # ์‹ค์ œ ์‚ฌ์šฉ ์‚ฌ๋ก€์— ๋งž๊ฒŒ ์„ ํƒ์ž๋ฅผ ์กฐ์ •ํ•ด์•ผ ํ•ฉ๋‹ˆ๋‹ค.
18
+ texts = [p.text for p in soup.find_all('p')]
19
+
20
+ # ์ถ”์ถœํ•œ ๋ฐ์ดํ„ฐ๋ฅผ ์ถœ๋ ฅํ•ฉ๋‹ˆ๋‹ค.
21
+ for text in texts:
22
+ print(text)
23
+
24
+ # ์„ ํƒ์ : ์ถ”์ถœํ•œ ๋ฐ์ดํ„ฐ๋ฅผ DataFrame์œผ๋กœ ๋ณ€ํ™˜ํ•˜๊ณ  ์—‘์…€ ํŒŒ์ผ๋กœ ์ €์žฅํ•ฉ๋‹ˆ๋‹ค.
25
+ df = pd.DataFrame(texts, columns=['Text'])
26
+ df.to_excel('extracted_data.xlsx', index=False)