Skip to content

Commit a46de20

Browse files
modified env.yaml, cleaned up structure, made basic API
1 parent c14a1c4 commit a46de20

27 files changed

+651363
-2
lines changed
File renamed without changes.

backend/Data/malicious_phish.csv

+651,199
Large diffs are not rendered by default.
File renamed without changes.

backend/app/api/models/fraud.py

Whitespace-only changes.

backend/app/api/models/phish_model.py

+18
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
import pickle
2+
3+
4+
class RFModel:
5+
def __init__(self):
6+
self.model = self.load_model("phish_model.pkl")
7+
self.vectorizer = self.load_model("vectorizer.pkl")
8+
9+
def load_model(self, model_path):
10+
with open(model_path, "rb") as f:
11+
model = pickle.load(f)
12+
return model
13+
14+
def predict(self, input_data):
15+
# Use the loaded model to make predictions on the input data
16+
input_vectorized = self.vectorizer.transform(input_data)
17+
predictions = self.model.predict(input_data)
18+
return predictions

backend/app/api/models/reports.py

Whitespace-only changes.

backend/app/api/models/rf_model.pkl

78.9 MB
Binary file not shown.

backend/app/api/models/vectorizer.pkl

2.54 MB
Binary file not shown.
File renamed without changes.

backend/app/api/routers/auth.py

Whitespace-only changes.

backend/app/api/routers/fraud.py

Whitespace-only changes.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
from fastapi import APIRouter
2+
from app.utils.get_redirected_urls import get_redirected_urls
3+
4+
router = APIRouter()
5+
6+
@router.get("/get_redirected_urls/{url}")
7+
async def get_redirected_urls_from_url(url: str):
8+
return get_redirected_urls(url)
9+
+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
from fastapi import APIRouter, Depends
2+
from app.api.models.phish_model import RFModel
3+
4+
router = APIRouter()
5+
6+
7+
@router.get("/phish_model")
8+
async def predict_phish_model(url: str):
9+
return RFModel().predict([url])[0]

backend/app/api/routers/reports.py

Whitespace-only changes.

backend/app/database/connection.py

Whitespace-only changes.

backend/app/database/models.py

Whitespace-only changes.

backend/app/main.py

+15-1
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,23 @@
11
from fastapi import FastAPI
2+
from fastapi.middleware.cors import CORSMiddleware
3+
from fastapi.responses import JSONResponse
4+
from app.api.routers.get_redirected_urls import router as redirected_urls_router
5+
from app.api.routers.model_serve import router as model_serve_router
26

37
app = FastAPI()
48

9+
app.add_middleware(
10+
CORSMiddleware,
11+
allow_origins=["*"],
12+
allow_credentials=True,
13+
allow_methods=["*"],
14+
allow_headers=["*"],
15+
)
16+
17+
app.include_router(redirected_urls_router)
18+
app.include_router(model_serve_router)
19+
520

6-
# Sample endpoint
721
@app.get("/")
822
async def read_root():
923
return {"message": "Welcome to the Fraud Detection API"}

backend/app/security/auth_utils.py

Whitespace-only changes.

backend/app/security/security_utils.py

Whitespace-only changes.

backend/app/services/auth_service.py

Whitespace-only changes.

backend/app/services/fraud_service.py

Whitespace-only changes.

backend/app/services/report_service.py

Whitespace-only changes.

backend/app/utils/common_utils.py

Whitespace-only changes.
+25
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
import requests
2+
from bs4 import BeautifulSoup
3+
import urllib.parse
4+
5+
6+
def get_redirected_urls(url):
7+
try:
8+
response = requests.get(url, allow_redirects=False)
9+
if response.status_code == 200:
10+
soup = BeautifulSoup(response.content, "html.parser")
11+
links = soup.find_all("a", href=True)
12+
redirected_urls = []
13+
for link in links:
14+
href = link["href"]
15+
full_url = urllib.parse.urljoin(url, href)
16+
redirected_url = requests.head(full_url, allow_redirects=True).url
17+
if redirected_url != full_url:
18+
redirected_urls.append((full_url, redirected_url))
19+
return redirected_urls
20+
else:
21+
print("Failed to fetch URL:", url)
22+
return []
23+
except Exception as e:
24+
print("An error occurred:", str(e))
25+
return []

backend/fraud_detection_model.pkl

3.35 MB
Binary file not shown.

environment.yaml

+83
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
name: fds
2+
channels:
3+
- conda-forge
4+
- defaults
5+
dependencies:
6+
- _libgcc_mutex=0.1=main
7+
- _openmp_mutex=5.1=1_gnu
8+
- _py-xgboost-mutex=2.0=cpu_0
9+
- annotated-types=0.6.0=py311h06a4308_0
10+
- anyio=4.3.0=pyhd8ed1ab_0
11+
- argon2-cffi=21.3.0=pyhd3eb1b0_0
12+
- argon2-cffi-bindings=21.2.0=py311h5eee18b_0
13+
- bcrypt=3.2.0=py311h5eee18b_1
14+
- beautifulsoup4=4.12.2=py311h06a4308_0
15+
- blas=1.0=mkl
16+
- bottleneck=1.3.7=py311hf4808d0_0
17+
- brotli-python=1.0.9=py311h6a678d5_7
18+
- bs4=4.12.2=py38hd3eb1b0_0
19+
- bzip2=1.0.8=h5eee18b_5
20+
- ca-certificates=2024.3.11=h06a4308_0
21+
- certifi=2024.2.2=py311h06a4308_0
22+
- cffi=1.16.0=py311h5eee18b_0
23+
- charset-normalizer=2.0.4=pyhd3eb1b0_0
24+
- click=8.1.7=py311h06a4308_0
25+
- cryptography=42.0.5=py311hdda0065_0
26+
- exceptiongroup=1.2.0=pyhd8ed1ab_2
27+
- fastapi=0.110.2=pyhd8ed1ab_0
28+
- h11=0.14.0=py311h06a4308_0
29+
- idna=3.7=pyhd8ed1ab_0
30+
- intel-openmp=2023.1.0=hdb19cb5_46306
31+
- joblib=1.4.0=py311h06a4308_0
32+
- ld_impl_linux-64=2.38=h1181459_1
33+
- libffi=3.4.4=h6a678d5_0
34+
- libgcc-ng=11.2.0=h1234567_1
35+
- libgfortran-ng=11.2.0=h00389a5_1
36+
- libgfortran5=11.2.0=h1234567_1
37+
- libgomp=11.2.0=h1234567_1
38+
- libstdcxx-ng=11.2.0=h1234567_1
39+
- libuuid=1.41.5=h5eee18b_0
40+
- libxgboost=2.0.3=h6a678d5_0
41+
- mkl=2023.1.0=h213fc3f_46344
42+
- mkl-service=2.4.0=py311h5eee18b_1
43+
- mkl_fft=1.3.8=py311h5eee18b_0
44+
- mkl_random=1.2.4=py311hdb19cb5_0
45+
- ncurses=6.4=h6a678d5_0
46+
- numexpr=2.8.7=py311h65dcdc2_0
47+
- numpy=1.26.4=py311h08b1b3b_0
48+
- numpy-base=1.26.4=py311hf175353_0
49+
- openssl=3.0.13=h7f8727e_0
50+
- pandas=2.2.1=py311ha02d727_0
51+
- passlib=1.7.4=pyhd3eb1b0_0
52+
- pip=23.3.1=py311h06a4308_0
53+
- py-xgboost=2.0.3=py311h06a4308_0
54+
- pycparser=2.21=pyhd3eb1b0_0
55+
- pydantic=2.5.3=py311h06a4308_0
56+
- pydantic-core=2.14.6=py311hb02cf49_0
57+
- pysocks=1.7.1=py311h06a4308_0
58+
- python=3.11.9=h955ad1f_0
59+
- python-dateutil=2.8.2=pyhd3eb1b0_0
60+
- python-tzdata=2023.3=pyhd3eb1b0_0
61+
- pytz=2024.1=py311h06a4308_0
62+
- readline=8.2=h5eee18b_0
63+
- requests=2.31.0=py311h06a4308_1
64+
- scikit-learn=1.2.2=py311h6a678d5_1
65+
- scipy=1.11.4=py311h08b1b3b_0
66+
- setuptools=68.2.2=py311h06a4308_0
67+
- six=1.16.0=pyhd3eb1b0_1
68+
- sniffio=1.3.1=pyhd8ed1ab_0
69+
- soupsieve=2.5=py311h06a4308_0
70+
- sqlite=3.41.2=h5eee18b_0
71+
- starlette=0.37.2=pyhd8ed1ab_0
72+
- tbb=2021.8.0=hdb19cb5_0
73+
- threadpoolctl=2.2.0=pyh0d69192_0
74+
- tk=8.6.12=h1ccaba5_0
75+
- typing-extensions=4.11.0=hd8ed1ab_0
76+
- typing_extensions=4.11.0=pyha770c72_0
77+
- tzdata=2024a=h04d1e81_0
78+
- urllib3=2.1.0=py311h06a4308_1
79+
- uvicorn=0.20.0=py311h06a4308_0
80+
- wheel=0.41.2=py311h06a4308_0
81+
- xgboost=2.0.3=py311h06a4308_0
82+
- xz=5.4.6=h5eee18b_0
83+
- zlib=1.2.13=h5eee18b_0

theplan.md

+5-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
Ideas
22

3-
3+
- RAG to retrieve known fraud
4+
- Page Scanner for suspicious redirects
5+
- model for judging website based on domain name
6+
- button for reporting fraudulent websites
7+
- measures to prevent spam of reports by rivals of a website
48

59
System design(ig)
610

0 commit comments

Comments
 (0)