Speed up the refresh of a Twitter page #2451
-
I created a multiprocess bot (using BaseCase and pytest-xdist) that retrieves new Tweets from some Twitter accounts (it could be 2 or whatever). I use the plugin (old-twitter-layout-2023) with the Chrome browser. I calculated the loading time of the Twitter page and saw that it takes about 2.5 seconds. from seleniumbase import SB
def get_account_twitter():
# ...
accounts = # list of accounts to scrape
for index, account in enumerate(accounts):
d = {}
# ...
yield d
@pytest.mark.parametrize("account", get_account_twitter())
def test_multi_threaded(sb, account, tmp_path_factory):
# get parameters from 'account' variable
sb.open("https://www.twitter.com")
login_to_Twitter(sb, d) # it works
profile_url = f'https://twitter.com/{account_to_scrape}/with_replies'
sb.open(profile_url)
discard_tweet = True
x_rate_limit_remaining = None
tweet = "Tweet text"
while 1:
sb.refresh()
try:
# it takes about 2.5 seconds!! IS IT POSSIBLE TO SPEED UP????????????
tweet = sb.get_text('//*[@id="timeline"]/div[1]/article/div[1]/span', timeout=4)
attribute = sb.get_attribute('//*[@id="timeline"]/div[1]/article/a', "title")
# I use the attribute for some minor things
except Exception as err:
# ...
# ...
def start_twitter(paramsTwitter):
BaseCase.main(__name__, __file__, paramsTwitter['number_processes'], "--wire", "--extension-dir=temp/old-twitter-layout-2023/", "--headless2", "--pls=none", "--sjw", "--block-images", "--disable-warnings", "-p no:warnings", "--no-header", "--no-summary", "--quiet", "--maxfail=1")
async def run_blocking_tasks(executor, xxx, paramsTwitter):
event_loop = asyncio.get_event_loop()
# here there are other 3 tasks that need to be runned as well as twitter
blocking_tasks = []
if condition:
blocking_tasks.append(event_loop.run_in_executor(executor, start_twitter, paramsTwitter))
if condition:
blocking_tasks.append(other task)
await asyncio.wait(blocking_tasks)
def main():
executor = concurrent.futures.ThreadPoolExecutor()
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
# paramsTwitter: some useful params on Twitter
try:
loop.run_until_complete(
run_blocking_tasks(executor, xxx, paramsTwitter)
)
finally:
loop.close()
if __name__ == '__main__':
main() Thanks in advance :) |
Beta Was this translation helpful? Give feedback.
Replies: 1 comment
-
There are several options for speeding up page loads. Eg: Line 688 in ca8451d Line 686 in ca8451d Line 687 in ca8451d Line 668 in ca8451d Line 669 in ca8451d The old Ad-blocking can also be done via CDP. Example: SeleniumBase/examples/test_cdp_ad_blocking.py Using SeleniumBase already includes a multi-threading library via |
Beta Was this translation helpful? Give feedback.
There are several options for speeding up page loads. Eg:
SeleniumBase/README.md
Line 688 in ca8451d
SeleniumBase/README.md
Line 686 in ca8451d
SeleniumBase/README.md
Line 687 in ca8451d
SeleniumBase/README.md
Line 668 in ca8451d
SeleniumBase/README.md
Line 669 in ca8451d