-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathindeed.py
131 lines (119 loc) · 5.42 KB
/
indeed.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import undetected_chromedriver as uc
from selenium import webdriver
from seleniumbase import Driver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
class indeed():
def init(self,location,skill,date,mskill):
self.count = 1
self.mskill = mskill
location = location
skill=skill
m_date = ''
if date =='':
m_date = date
try:
date = int(date)
if date ==1:
m_date = 1
if date > 1 and date <= 3:
m_date = 3
if date > 3 and date <=7:
m_date = 7
if date >7 and date <=14:
m_date = 14
if date >14 and date <=30:
m_date = 'last'
if date > 30:
m_date = ''
except: pass
print(date)
self.job= []
op = uc.ChromeOptions()
custom_user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"
op.add_argument("--disable-blink-feature=AutomationControlled")
op.add_experimental_option("excludeSwitches", ["enable-automation"])
op.add_experimental_option('useAutomationExtension', False)
op.add_argument(f'--user-agent={custom_user_agent}')
op.add_argument(f'--headless={True}')
self.driver = webdriver.Chrome(options=op)
self.driver.execute_script(
"Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
search_url = f"https://fr.indeed.com/jobs?q={self.mskill}&l={location}&fromage={m_date}"
self.driver.get(search_url)
time.sleep(1)
try:
iframe = WebDriverWait(self.driver,10).until(EC.presence_of_element_located((By.TAG_NAME, "iframe")))
self.driver.switch_to.frame(iframe)
WebDriverWait(self.driver, 10).until(EC.presence_of_element_located((By.XPATH, "//input[@type='checkbox']"))).click()
self.driver.switch_to.window(self.driver.window_handles[0])
except:
pass
self.driver.get(search_url)
self.active_flag = True
self.driver.get(search_url)
time.sleep(2)
try:
WebDriverWait(self.driver,5).until(EC.presence_of_element_located((By.XPATH, "/html/body/div[3]/div/div/div[1]/button"))).click()
except:pass
try:
WebDriverWait(self.driver,5).until(EC.presence_of_element_located((By.XPATH, "//button[@id='onetrust-accept-btn-handler']"))).click()
except: pass
try:
self.job_fetch()
while self.active_flag:
time.sleep(2)
try:
self.job_fetch()
except: pass
except:
pass
print("")
self.driver.quit()
self.driver.quit()
return self.job
def job_fetch(self):
left = WebDriverWait(self.driver,30).until(EC.presence_of_element_located((By.CSS_SELECTOR, ".jobsearch-LeftPane")))
data = WebDriverWait(left,10).until(EC.presence_of_all_elements_located((By.TAG_NAME, 'li')))
for i in data:
indeedjob = {}
try:
adata = i.find_element(By.TAG_NAME, 'a')
# adata = i.find_element(By.TAG_NAME, 'a')
jobtitle = adata.get_attribute("id")
print(jobtitle)
if jobtitle == '':
try:
check = self.driver.find_element(By.XPATH, "//h3[@class='DesktopJobAlertPopup-heading']")
if check:
WebDriverWait(self.driver,4).until(EC.presence_of_element_located((By.XPATH, "//button[@aria-label='fermer']"))).click()
except: pass
if jobtitle[:3] == "job" and jobtitle != '':
indeedjob["title"] = adata.text
indeedjob["skill"] = self.mskill
indeedjob["site_name"] = "Indeed"
indeedjob["job_link"] = adata.get_attribute("href")
indeedjob["company"] = i.find_element(By.XPATH, ".//span[@class='companyName']").text
indeedjob["location"]= i.find_element(By.XPATH, ".//div[@class='companyLocation']").text
tmp_date = i.find_element(By.XPATH, ".//div[@class='heading6 tapItem-gutter result-footer']/span[@class='date']").text
indeedjob["post_date"] = tmp_date[6:]
print("title------",indeedjob["title"])
print("job_link------",indeedjob["job_link"])
print("company------",indeedjob["company"])
print("location------",indeedjob["location"])
print("post_date------",indeedjob["post_date"])
self.job.append(indeedjob)
except:pass
try:
self.count+=1
print(self.count)
time.sleep(.6)
next = self.driver.find_element(By.XPATH, f"//a[@data-testid='pagination-page-{self.count}']")
nexturl = next.get_attribute('href')
self.driver.get(nexturl)
self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
except:
self.active_flag = False
pass