delphine18 commited on
Commit
695b43d
·
verified ·
1 Parent(s): 6f86415

Upload 4 files

Browse files
Files changed (4) hide show
  1. Dockerfile +11 -0
  2. app.py +14 -0
  3. requirements.txt +13 -0
  4. webscraping.py +128 -0
Dockerfile ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10
2
+
3
+ WORKDIR /code
4
+
5
+ COPY ./requirements.txt /code/requirements.txt
6
+
7
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
8
+
9
+ COPY . .
10
+
11
+ CMD ["flask", "run","--host=0.0.0.0", "--port=7860"]
app.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify
2
+ from webscraping import scrape
3
+ app = Flask(__name__)
4
+ app.config['JSON_SORT_KEYS'] = False
5
+
6
+ @app.route('/',methods=['POST','GET'])
7
+ def give():
8
+ filters = request.get_json()
9
+ commodity = filters['commodity']
10
+ data1,data2,l = scrape(commodity)
11
+ return jsonify({"text":data1, "image":data2, "indices":l})
12
+
13
+ if __name__=="__main__":
14
+ app.run()
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ beautifulsoup4==4.10.0
2
+ Flask==3.0.2
3
+ requests==2.25.1
4
+ Jinja2==3.1.3
5
+ numpy==1.26.4
6
+ Werkzeug==3.0.1
7
+ itsdangerous==2.1.2
8
+ blinker==1.7.0
9
+ click==8.1.7
10
+ urllib3==1.26.5
11
+ matplotlib==3.5.1
12
+
13
+
webscraping.py ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #%%
2
+ from bs4 import BeautifulSoup
3
+ import requests
4
+ import matplotlib.pyplot as plt
5
+ import io
6
+ import base64
7
+ import re
8
+
9
+ # get URL
10
+
11
+ def scrape(commodity="Apple"):
12
+ #list
13
+ list1 = ['rice','maize','jute','cotton','coconut','papaya','orange_(fruit)','apple','cantaloupe','watermelon',
14
+ 'grapes','mango','banana','pomegranate','lentil','blackgram','mungbean','mothbean','pigeonpea',
15
+ 'chickpea','coffee']
16
+ link = "https://en.wikipedia.org/wiki/"
17
+ page = requests.get(link+(commodity.capitalize()))
18
+
19
+ # scrape webpage
20
+ soup = BeautifulSoup(page.content, 'lxml')
21
+ tags = soup.find_all('h2')
22
+ headings = ['description', 'agronomy', 'pests', 'structure and physiology', 'pests and diseases',
23
+ 'cultivation', 'uses','use', 'environmental impact', 'types','varieties','breeding']
24
+
25
+ # for head in tags:
26
+ # print(head.text)
27
+ infopage = ""
28
+ textjson = {}
29
+ l=[]
30
+ i,j,k=1,1,1
31
+ for head in tags:
32
+ if any(sub in head.text.lower() for sub in headings):
33
+ #tempjson = {}
34
+ #print(head.text)
35
+ #infopage+=head.text
36
+ head_text = head.text
37
+ head_text = re.sub(r'\[\d+\]', '', head_text)
38
+ textjson[f"heading{i}"] = head_text
39
+ l.append(f"heading{i}")
40
+ i+=1
41
+ content = head.find_next_sibling()
42
+ #tempjson['heading'] = head_text
43
+
44
+ while content and content.name!='h2':
45
+ if content.name =='h3' :
46
+ subheading = content.text
47
+ #print(content.text)
48
+ subheading = re.sub(r'\[\d+\]', '', subheading)
49
+ #print(subheading)
50
+ textjson[f"subheading{j}"] = subheading
51
+ l.append(f"subheading{j}")
52
+ j+=1
53
+ #infopage+=content.text
54
+ if content.name == 'p':
55
+ para =content.text
56
+ para = re.sub(r'\[\d+\]', '', para)
57
+ textjson[f"para{k}"] = para
58
+ l.append(f"para{k}")
59
+ k+=1
60
+
61
+ content = content.find_next_sibling()
62
+ # textjson[f'heading{i}']= head_text
63
+ # textjson[f'content{i}'] = tempjson
64
+ #text = re.sub(r'\[\d+\]', '', infopage)
65
+
66
+
67
+ #for commodity prices
68
+ link2 = "https://api.data.gov.in/resource/9ef84268-d588-465a-a308-a864a43d0070?api-key=579b464db66ec23bdd000001cdd3946e44ce4aad7209ff7b23ac571b&format=json&filters%5Bcommodity%5D="
69
+ #commodity = "Apple"
70
+ info = requests.get(link2+(commodity.capitalize()))
71
+ # print(link2+(commodity.capitalize()))
72
+ # print(info)
73
+
74
+ if info.status_code == 200:
75
+ data = info.json()
76
+ data = data['records']
77
+ if data == []:
78
+ print("no data available")
79
+ else:
80
+ print("Failed to retrieve data. Status code:", info.status_code)
81
+
82
+ # Extracting data for plotting
83
+ markets = [entry['market'] for entry in data]
84
+ min_prices = [float(entry['min_price']) for entry in data]
85
+ max_prices = [float(entry['max_price']) for entry in data]
86
+ modal_prices = [float(entry['modal_price']) for entry in data]
87
+
88
+ #modifications
89
+ # for i in range(len(markets)):
90
+ # plt.text(modal_prices[i], i, f"{modal_prices[i]}", ha='left', va='center', fontsize=8, color='black')
91
+ # plt.text(max_prices[i], i, f"{max_prices[i]}", ha='left', va='center', fontsize=8, color='black')
92
+ # plt.text(min_prices[i], i, f"{min_prices[i]}", ha='left', va='center', fontsize=8, color='black')
93
+
94
+ # for index, value in enumerate(modal_prices):
95
+ # plt.text(value, index, str(value), va='center', fontsize=10, color='black')
96
+
97
+
98
+ # Plotting
99
+ plt.figure(figsize=(10, 6))
100
+ plt.barh(markets, min_prices, color='lightblue', label='Min Price')
101
+ plt.barh(markets, max_prices, color='skyblue', label='Max Price', left=min_prices)
102
+ plt.barh(markets, modal_prices, color='dodgerblue', label='Modal Price', left=[min + max for min, max in zip(min_prices, max_prices)])
103
+ plt.xlabel('Price')
104
+ plt.ylabel('Market')
105
+ plt.title(f'{commodity} Prices in Different Markets')
106
+ plt.legend()
107
+ plt.tight_layout()
108
+ #plt.show()
109
+
110
+ # Encode the plot image as a bytes object
111
+ buffer = io.BytesIO()
112
+ plt.savefig(buffer, format='png')
113
+ buffer.seek(0)
114
+ plot_image = buffer.getvalue()
115
+ buffer.close()
116
+
117
+ # Encode the plot image as base64
118
+ plot_image_base64 = base64.b64encode(plot_image).decode('utf-8')
119
+
120
+ response_data = plot_image_base64
121
+ # {
122
+ # 'plot_image':
123
+ # }
124
+ return textjson,response_data,l
125
+
126
+ # display scraped data
127
+ #print(soup.prettify())
128
+ # %%