Skip to content

Commit 573bb1e

Browse files
authored
Merge pull request larymak#349 from MBSA-INFINITY/instragram-scraper
Instragram Profile Scraper using Flask
2 parents 4b7636f + 0a52c12 commit 573bb1e

File tree

6 files changed

+147
-0
lines changed

6 files changed

+147
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
*.pyc
2+
/__pycache__
3+
.env
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
<!--Please do not remove this part-->
2+
![Star Badge](https://img.shields.io/static/v1?label=%F0%9F%8C%9F&message=If%20Useful&style=style=flat&color=BC4E99)
3+
![Open Source Love](https://badges.frapsoft.com/os/v1/open-source.svg?v=103)
4+
5+
# Instragam Profile Scraper in Flask
6+
7+
## 🛠️ Description
8+
This project is about developing a an API in **Flask** using **Instaloader** and **Instagram's GraphQL API's** to scrape the no of likes and comments of all of the post of a public instagram profile.
9+
10+
## ⚙️ Languages or Frameworks Used
11+
- Flask
12+
- Instaloader, Instgram GraphQL APIs
13+
14+
15+
## 🌟 How to run
16+
- ### Install all the requirements
17+
Run `pip install -r requirements.txt` to install all the requirements.
18+
19+
- ### Now Just, Run the project
20+
- To the run the project, go to the `bash` terminal of VSCode or any other code editor and run `./start_server.sh`.
21+
- The server would start running on `http://127.0.0.1:{port_number}`.(generally http://127.0.0.1:5000)
22+
23+
- ### Explore the API
24+
Go to the browser/postman/thunderclient and hit the following URL http://127.0.0.1:5000/get_profile/{instagram_username}
25+
> Note: The Instagram Profile must be **public**.
26+
27+
28+
## 📺 Demo
29+
- Main screen of the application.
30+
![image](https://github.com/MBSA-INFINITY/Python-project-Scripts/assets/85332648/0b57f8f5-aa8a-416e-9fad-e86c4e416e33)
31+
- Result for my instagram account (**@mbsaiaditya**).
32+
![image](https://github.com/MBSA-INFINITY/Python-project-Scripts/assets/85332648/2069060a-9fdf-4877-aadc-e708e5505e60)
33+
34+
## 🤖 Author
35+
Github - [MBSA-INFINITY](https://github.com/MBSA-INFINITY)
36+
LinkedIn - [MBSAIADITYA](https://www.linkedin.com/in/mbsaiaditya/)
37+
Portfolio - [MBSA](https://mbsaiaditya.in/)
38+
Instagram - [MBSAIADITYA](https://instagram.com/mbsaiaditya)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
from flask import Flask, request, jsonify
2+
from helper import get_all_posts, scrape_user_id
3+
import instaloader
4+
5+
app = Flask(__name__)
6+
insta = instaloader.Instaloader()
7+
8+
@app.route('/', methods=['GET'])
9+
def start():
10+
return "Instragram Scraper Server is Running!!"
11+
12+
@app.route('/get_profile/<username>', methods=['GET'])
13+
def get_instagram_profile(username):
14+
try:
15+
profile = instaloader.Profile.from_username(insta.context, username)
16+
#Get
17+
user_id = scrape_user_id(username)
18+
# Get post data for all posts
19+
post_data = get_all_posts(user_id)
20+
response = {
21+
"Username": profile.username,
22+
"Number Of Posts": profile.mediacount,
23+
"Posts": post_data
24+
}
25+
return jsonify(response)
26+
except instaloader.exceptions.ProfileNotExistsException:
27+
return jsonify({"error": "Profile does not exist"}), 404
28+
except instaloader.exceptions.InstaloaderException as e:
29+
return jsonify({"error": f"An error occurred: {str(e)}"}), 500
30+
except Exception as e:
31+
return jsonify({"error": f"{str(e)}"}), 400
32+
33+
34+
if __name__ == '__main__':
35+
app.run()
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
import json
2+
import httpx
3+
from urllib.parse import quote
4+
from typing import Dict
5+
import jmespath
6+
7+
client = httpx.Client(
8+
headers={
9+
"x-ig-app-id": "936619743392459",
10+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36",
11+
"Accept-Language": "en-US,en;q=0.9,ru;q=0.8",
12+
"Accept-Encoding": "gzip, deflate, br",
13+
"Accept": "*/*",
14+
}
15+
)
16+
17+
def scrape_user_id(username: str):
18+
"""Scrape Instagram user's data"""
19+
result = client.get(
20+
f"https://i.instagram.com/api/v1/users/web_profile_info/?username={username}",
21+
)
22+
data = json.loads(result.content)
23+
user_data = data["data"]["user"]
24+
user_id = user_data.get("id")
25+
return user_id
26+
27+
def parse_post(data: Dict) -> Dict:
28+
result = jmespath.search("""{
29+
shortcode: shortcode,
30+
likes: edge_media_preview_like.count,
31+
comments: edge_media_to_comment.count
32+
}""", data)
33+
return result
34+
35+
def scrape_user_posts(user_id: str, session: httpx.Client, page_size=12):
36+
base_url = "https://www.instagram.com/graphql/query/?query_hash=e769aa130647d2354c40ea6a439bfc08&variables="
37+
variables = {
38+
"id": user_id,
39+
"first": page_size,
40+
"after": None,
41+
}
42+
_page_number = 1
43+
while True:
44+
resp = session.get(base_url + quote(json.dumps(variables)))
45+
all_posts_data = resp.json()
46+
posts = all_posts_data["data"]["user"]["edge_owner_to_timeline_media"]
47+
for post in posts.get("edges"):
48+
yield parse_post(post.get("node")) # note: we're using parse_post function from previous chapter
49+
page_info = posts.get("page_info")
50+
if _page_number == 1:
51+
print(f"scraping total {posts['count']} posts of {user_id}")
52+
else:
53+
print(f"scraping page {_page_number}")
54+
if not page_info["has_next_page"]:
55+
break
56+
if variables["after"] == page_info["end_cursor"]:
57+
break
58+
variables["after"] = page_info["end_cursor"]
59+
_page_number += 1
60+
61+
62+
def get_all_posts(user_id):
63+
with httpx.Client(timeout=None) as session:
64+
posts = list(scrape_user_posts(str(user_id), session))
65+
return posts
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Flask
2+
instaloader
3+
flask-cors
4+
httpx
5+
jmespath
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
python app.py

0 commit comments

Comments
 (0)