Skip to content

Commit

Permalink
tp ush
Browse files Browse the repository at this point in the history
Merge branch 'master' of github.com:saasify-sh/twitter-flock

* 'master' of github.com:saasify-sh/twitter-flock:
  Add ranking options by followers, age, verification (#5)
  • Loading branch information
transitive-bullshit committed Jul 11, 2020
2 parents cf92e5e + 93aadd4 commit 62ddcf7
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 0 deletions.
23 changes: 23 additions & 0 deletions lib/ranking.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
'use strict'

// TODO: Serialization mechanism to pass data objects.
// Options:
// - List of user id's in stdout
// - JSON in stdout
// - JSON in output file
// - SQLite database

const spawn = require('child_process').spawn
const pythonProcess = spawn('python3', ['ranking/rank.py'])

pythonProcess.stdout.on('data', (data) => {
// Do something with the data returned from python script
console.log('received data from python stdout')
console.log(data.toString())
})

pythonProcess.stderr.on('data', (data) => {
// Do something with the data returned from python script
console.log('received data from python stderr')
console.log(data.toString())
})
47 changes: 47 additions & 0 deletions ranking/rank.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import pandas as pd
import json
import pprint

# These methods are all functional
def filter_verified(df) -> pd.DataFrame:
return df[df['verified'] == True]

def rank_by_col(df, col: str, ascending: bool) -> pd.DataFrame:
return df.sort_values(col, ascending=ascending)

def rank_by_lambda(df, lmbda, ascending: bool) -> pd.DataFrame:
""" Lambda function should have a single DataFrame argument and return a Series.
For example, `lmbda = lambda df: df.followers_count * df.friends_count`.
"""
return df.loc[lmbda(df).sort_values(ascending=ascending).index]

if __name__ == "__main__":
print("Running 'python ranking/test.py'")
pd.set_option('display.max_columns', None)

with open("fixtures/twitter-users-10k.json", "r") as read_file:
users: list = json.load(read_file)

cols = ["name", "screen_name", "description", "url", "followers_count", "friends_count",
"favourites_count", "created_at", "verified", "following", "status", "id", "id_str"]
df = pd.DataFrame(users)[cols]
df["created_at"] = pd.to_datetime(df["created_at"])

print(df)
# Filter by verified
print("Filter verified")
print(filter_verified(df))
# Filter by followers
print("By followers")
print(rank_by_col(df, 'followers_count', ascending=False))
# Filter by friends (someone who follows you and nobody else is likely a superfan)
print("By friends")
print(rank_by_col(df, 'friends_count', ascending=True))
# Filter by favourites activity
print("By favourites")
print(rank_by_col(df, 'favourites_count', ascending=False))
# Filter by follower ratio
rank_by_lambda(df, lambda df: df.followers_count - df.friends_count, ascending=False)
# Filter by account age
print(rank_by_col(df, 'created_at', ascending=True))

0 comments on commit 62ddcf7

Please sign in to comment.