-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.py
72 lines (58 loc) · 2.35 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
from flask import Flask, render_template, request
from pycaret.classification import load_model, predict_model
from utils import clean_url, get_geoip_location
import pandas as pd
from urllib.parse import urlparse
app = Flask(__name__)
model = load_model("models/website_legitimacy_model")
# Label mapping for prediction classes
label_mapping = {
0: "Benign",
1: "Phishing",
2: "Defacement",
3: "Malware"
}
# List of trusted domains to bypass the model prediction
trusted_domains = ["facebook.com", "youtube.com", "google.com", "kaggle.com"]
@app.route('/')
def home():
return render_template("home.html")
@app.route('/predict', methods=['POST'])
def predict():
if request.method == 'POST':
url = request.form['url']
cleaned_url = clean_url(url)
# Extract domain
domain = urlparse(cleaned_url).netloc.replace("www.", "")
# Check if the domain is in the trusted domains list
if domain in trusted_domains:
predicted_class = "Benign"
confidence_score = 1.0 # High confidence for trusted domains
else:
# Prepare input DataFrame for model prediction
features = {
'url_length': len(cleaned_url),
'special_char_count': sum(1 for c in cleaned_url if not c.isalnum()),
'is_https': 1 if 'https' in cleaned_url else 0,
'digit_count': sum(c.isdigit() for c in cleaned_url),
'letter_count': sum(c.isalpha() for c in cleaned_url),
'url': cleaned_url, # Add placeholder for 'url' column
'domain': domain # Add placeholder for 'domain' column
}
input_df = pd.DataFrame([features])
# Model prediction
prediction = predict_model(model, data=input_df)
predicted_class = label_mapping[prediction['prediction_label'][0]]
confidence_score = prediction['prediction_score'][0]
# Perform GeoIP lookup
geoip_info = get_geoip_location(cleaned_url)
# Render the result page with prediction and GeoIP information
return render_template(
"result.html",
url=url,
predicted_class=predicted_class,
confidence_score=confidence_score,
geoip_info=geoip_info
)
if __name__ == '__main__':
app.run(debug=True)