-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscrape.py
30 lines (23 loc) · 1.01 KB
/
scrape.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
from bs4 import BeautifulSoup
import requests
def get_soup(searchVal):
#URL format equivalent to going to dictionary.com and searching the argument
URL= f"https://www.dictionary.com/browse/{searchVal}"
#getting html source code
source = requests.get(URL).text
#parsing it using BeautifulSoup and lxml as the parser
soup = BeautifulSoup(source,'lxml')
#the html class of the section we want from dictionary.com : css-pnw38j e1hk9ate4
section = 'css-pnw38j e1hk9ate4'
#returns list of all <section> tags that have class attribute set to css-pnw38j e1hk9ate4
return soup.find_all('section',class_=section)
#Add stuff to do when no results come aka this won't exist
#rename me
def func(section):
heading = section.h3.span.text
#meanings exist in div elements
#find_all useful as some words have multiple meaning within the same heading
meanings = section.div.find_all('div')
for meaning in meanings:
print(meaning.prettify())
func(get_soup('fuck')[1])