Merge pull request larymak#349 from MBSA-INFINITY/instragram-scraper

larymak · web-flow · commit 573bb1eeacf4 · 2023-10-20T08:47:01.000+03:00
Instragram Profile Scraper using Flask
diff --git a/FLASK PROJECTS/Instagram Scraper using Flask/.gitignore b/FLASK PROJECTS/Instagram Scraper using Flask/.gitignore
@@ -0,0 +1,3 @@
+*.pyc
+/__pycache__
+.env
diff --git a/FLASK PROJECTS/Instagram Scraper using Flask/README.md b/FLASK PROJECTS/Instagram Scraper using Flask/README.md
@@ -0,0 +1,38 @@
+<!--Please do not remove this part-->
+![Star Badge](https://img.shields.io/static/v1?label=%F0%9F%8C%9F&message=If%20Useful&style=style=flat&color=BC4E99)
+![Open Source Love](https://badges.frapsoft.com/os/v1/open-source.svg?v=103)
+
+# Instragam Profile Scraper in Flask
+
+## 🛠️ Description
+This project is about developing a an API in **Flask** using **Instaloader** and **Instagram's GraphQL API's** to scrape the no of likes and comments of all of the post of a public instagram profile.
+
+## ⚙️ Languages or Frameworks Used
+ - Flask
+ - Instaloader, Instgram GraphQL APIs
+
+
+## 🌟 How to run
+ - ### Install all the requirements
+    Run `pip install -r requirements.txt` to install all the requirements.
+
+-  ###  Now Just, Run the project
+    - To the run the project, go to the `bash` terminal of VSCode or any other code editor and run `./start_server.sh`.
+    - The server would start running on `http://127.0.0.1:{port_number}`.(generally http://127.0.0.1:5000)
+    
+ - ### Explore the API
+    Go to the browser/postman/thunderclient and hit the following URL http://127.0.0.1:5000/get_profile/{instagram_username}
+ > Note: The Instagram Profile must be **public**.
+
+
+## 📺 Demo
+- Main screen of the application.
+![image](https://github.com/MBSA-INFINITY/Python-project-Scripts/assets/85332648/0b57f8f5-aa8a-416e-9fad-e86c4e416e33)
+- Result for my instagram account (**@mbsaiaditya**).
+![image](https://github.com/MBSA-INFINITY/Python-project-Scripts/assets/85332648/2069060a-9fdf-4877-aadc-e708e5505e60)
+
+## 🤖 Author
+Github - [MBSA-INFINITY](https://github.com/MBSA-INFINITY)
+LinkedIn - [MBSAIADITYA](https://www.linkedin.com/in/mbsaiaditya/)
+Portfolio - [MBSA](https://mbsaiaditya.in/)
+Instagram - [MBSAIADITYA](https://instagram.com/mbsaiaditya)
diff --git a/FLASK PROJECTS/Instagram Scraper using Flask/app.py b/FLASK PROJECTS/Instagram Scraper using Flask/app.py
@@ -0,0 +1,35 @@
+from flask import Flask, request, jsonify
+from helper import get_all_posts, scrape_user_id
+import instaloader
+
+app = Flask(__name__)
+insta = instaloader.Instaloader()
+
+@app.route('/', methods=['GET'])
+def start():
+    return "Instragram Scraper Server is Running!!"
+
+@app.route('/get_profile/<username>', methods=['GET'])
+def get_instagram_profile(username):
+    try:
+        profile = instaloader.Profile.from_username(insta.context, username)
+        #Get 
+        user_id = scrape_user_id(username)
+        # Get post data for all posts
+        post_data = get_all_posts(user_id)
+        response = {
+            "Username": profile.username,
+            "Number Of Posts": profile.mediacount,
+            "Posts": post_data
+        }
+        return jsonify(response)
+    except instaloader.exceptions.ProfileNotExistsException:
+        return jsonify({"error": "Profile does not exist"}), 404
+    except instaloader.exceptions.InstaloaderException as e:
+        return jsonify({"error": f"An error occurred: {str(e)}"}), 500
+    except Exception as e:
+        return jsonify({"error": f"{str(e)}"}), 400
+
+
+if __name__ == '__main__':
+    app.run()
diff --git a/FLASK PROJECTS/Instagram Scraper using Flask/helper.py b/FLASK PROJECTS/Instagram Scraper using Flask/helper.py
@@ -0,0 +1,65 @@
+import json
+import httpx
+from urllib.parse import quote
+from typing import Dict
+import jmespath
+
+client = httpx.Client(
+    headers={
+        "x-ig-app-id": "936619743392459",
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36",
+        "Accept-Language": "en-US,en;q=0.9,ru;q=0.8",
+        "Accept-Encoding": "gzip, deflate, br",
+        "Accept": "*/*",
+    }
+)
+
+def scrape_user_id(username: str):
+    """Scrape Instagram user's data"""
+    result = client.get(
+        f"https://i.instagram.com/api/v1/users/web_profile_info/?username={username}",
+    )
+    data = json.loads(result.content)
+    user_data = data["data"]["user"]
+    user_id = user_data.get("id")
+    return user_id
+
+def parse_post(data: Dict) -> Dict:
+    result = jmespath.search("""{
+        shortcode: shortcode,
+        likes: edge_media_preview_like.count,
+        comments: edge_media_to_comment.count
+    }""", data)
+    return result
+
+def scrape_user_posts(user_id: str, session: httpx.Client, page_size=12):
+    base_url = "https://www.instagram.com/graphql/query/?query_hash=e769aa130647d2354c40ea6a439bfc08&variables="
+    variables = {
+        "id": user_id,
+        "first": page_size,
+        "after": None,
+    }
+    _page_number = 1
+    while True:
+        resp = session.get(base_url + quote(json.dumps(variables)))
+        all_posts_data = resp.json()
+        posts = all_posts_data["data"]["user"]["edge_owner_to_timeline_media"]
+        for post in posts.get("edges"):
+            yield parse_post(post.get("node"))  # note: we're using parse_post function from previous chapter
+        page_info = posts.get("page_info")
+        if _page_number == 1:
+            print(f"scraping total {posts['count']} posts of {user_id}")
+        else:
+            print(f"scraping page {_page_number}")
+        if not page_info["has_next_page"]:
+            break
+        if variables["after"] == page_info["end_cursor"]:
+            break
+        variables["after"] = page_info["end_cursor"]
+        _page_number += 1
+
+
+def get_all_posts(user_id):
+    with httpx.Client(timeout=None) as session:
+        posts = list(scrape_user_posts(str(user_id), session))
+        return posts
diff --git a/FLASK PROJECTS/Instagram Scraper using Flask/requirements.txt b/FLASK PROJECTS/Instagram Scraper using Flask/requirements.txt
@@ -0,0 +1,5 @@
+Flask
+instaloader
+flask-cors
+httpx 
+jmespath
diff --git a/FLASK PROJECTS/Instagram Scraper using Flask/start_server.sh b/FLASK PROJECTS/Instagram Scraper using Flask/start_server.sh
@@ -0,0 +1 @@
+python app.py