adding readme.md and requirements.txt

chaudharypraveen98 · chaudharypraveen98 · commit c6733a0ec5fa · 2020-10-18T00:13:42.000+05:30
diff --git a/quotes/quotes/settings.py b/quotes/quotes/settings.py
@@ -12,9 +12,18 @@
 SPIDER_MODULES = ['quotes.spiders']
 NEWSPIDER_MODULE = 'quotes.spiders'
 
+# PROXY_POOL_ENABLED = True
 
 # Crawl responsibly by identifying yourself (and your website) on the user-agent
 #USER_AGENT = 'quotes (+http://www.yourdomain.com)'
+DOWNLOADER_MIDDLEWARES = {
+    #The below two lines are for user agents
+    'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware': None,
+    'scrapy_user_agents.middlewares.RandomUserAgentMiddleware': 400,
+    # Enable the below line to use proxies
+    # 'scrapy_proxy_pool.middlewares.ProxyPoolMiddleware': 610,
+    # 'scrapy_proxy_pool.middlewares.BanDetectionMiddleware': 620,
+}
 
 # Obey robots.txt rules
 ROBOTSTXT_OBEY = True
diff --git a/quotes/quotes/spiders/QuotesScraper.py b/quotes/quotes/spiders/QuotesScraper.py
@@ -10,7 +10,7 @@ class QuotesScraper(scrapy.Spider):
 
     def _parse(self, response, **kwargs):
         item = QuotesItem()
-        for quote in response.css(".quote"):
+        for quote in response.css(".quote")[:2]:
             title = quote.css(".quoteText::text").extract_first()
             author = quote.css(".authorOrTitle::text").extract_first()
             item["title"] = title
diff --git a/readme.md b/readme.md
@@ -0,0 +1,8 @@
+## **QuotesScrapy**
+This scraper is based on the scrapy framework with pagination feature. It uses fake user agents to bypass the security.
+
+Steps to run the projects:-
+1. Activate virtual env with `. env/bin/activate`
+2. Install requirements using `pip install -r requirements.txt`
+3. Run the following commands:-
+<br>`scrapy crawl QuotesScraper`
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,3 @@
+Scrapy==2.4.0
+scrapy-proxy-pool==0.1.9
+scrapy-user-agents==0.1.1

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+Scrapy==2.4.0`
	`2`	`+scrapy-proxy-pool==0.1.9`
	`3`	`+scrapy-user-agents==0.1.1`