grapplerulrich commited on
Commit
37ee6a5
1 Parent(s): 1ec143e

Add support for more content classes

Browse files
Files changed (1) hide show
  1. beautiful_soup/app.py +15 -0
beautiful_soup/app.py CHANGED
@@ -61,6 +61,21 @@ def get_main_content( soup ):
61
  if content is not None:
62
  print('Has .region--content class.')
63
  return content
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
  return None
66
 
 
61
  if content is not None:
62
  print('Has .region--content class.')
63
  return content
64
+
65
+ content = soup.find( "div", { "class": "article" } )
66
+ if content is not None:
67
+ print('Has .article class.')
68
+ return content
69
+
70
+ content = soup.find( "div", { "class": "article-inner_html" } )
71
+ if content is not None:
72
+ print('Has .article-inner_html class.')
73
+ return content
74
+
75
+ content = soup.find( "article" )
76
+ if content is not None:
77
+ print('Has article tag.')
78
+ return content
79
 
80
  return None
81