-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathtravel_viz.py
165 lines (132 loc) · 5.29 KB
/
travel_viz.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
import streamlit as st
import pandas as pd
from datetime import datetime
from folium.plugins import HeatMap
import folium
from streamlit_folium import folium_static
import matplotlib.pyplot as plt
import flickrapi
import random
from dotenv import load_dotenv
import os
import urllib
load_dotenv()
# set page layout
st.set_page_config(
page_title="Travel Exploration",
page_icon="🌍",
layout="wide",
initial_sidebar_state="expanded",
)
@st.cache
def load_data():
""" Load the cleaned data with latitudes, longitudes & timestamps """
travel_log = pd.read_csv("clean_data.csv")
travel_log["date"] = pd.to_datetime(travel_log["ts"])
return travel_log
def get_pics_from_location(locations_df, size=10):
""" Get images from flickr using the gps coordinates"""
api_key = os.getenv("FLICKR_API_KEY")
api_secret = os.getenv("FLICKR_API_SECRET")
flickr = flickrapi.FlickrAPI(api_key, api_secret, format="parsed-json")
urls = []
for index, row in locations_df.iterrows():
try:
photos = flickr.photos.search(
lat=row["latitude"], lon=row["longitude"], per_page=10, pages=1
)
# Get a random image from the set of images
choice_max = min(size - 1, int(photos["photos"]["total"]))
selection = random.randint(0, choice_max)
selected_photo = photos["photos"]["photo"][selection]
# Compute the url for the image
url = f"https://live.staticflickr.com/{selected_photo['server']}/{selected_photo['id']}_{selected_photo['secret']}_w.jpg"
urls.append(url)
except Exception as e:
print(e)
continue
return urls
@st.cache(show_spinner=False)
def get_file_content_as_string(path):
""" Download a single file and make its content available as a string"""
url = (
"https://raw.githubusercontent.com/nithishr/streamlit-data-viz-demo/main/"
+ path
)
response = urllib.request.urlopen(url)
return response.read().decode("utf-8")
st.title("🌍 Travels Exploration")
travel_data = load_data()
# Calculate the timerange for the slider
min_ts = datetime.strptime(min(travel_data["ts"]), "%Y-%m-%d %H:%M:%S.%f")
max_ts = datetime.strptime(max(travel_data["ts"]), "%Y-%m-%d %H:%M:%S.%f")
st.sidebar.subheader("Inputs")
min_selection, max_selection = st.sidebar.slider(
"Timeline", min_value=min_ts, max_value=max_ts, value=[min_ts, max_ts]
)
# Toggles for the feature selection in sidebar
show_heatmap = st.sidebar.checkbox("Show Heatmap")
show_histograms = st.sidebar.checkbox("Show Histograms")
show_images = st.sidebar.checkbox("Show Images")
images_count = st.sidebar.number_input("Images to Show", value=10)
show_detailed_months = st.sidebar.checkbox("Show Detailed Split per Year")
show_code = st.sidebar.checkbox("Show Code")
# Filter Data based on selection
st.write(f"Filtering between {min_selection.date()} & {max_selection.date()}")
travel_data = travel_data[
(travel_data["date"] >= min_selection) & (travel_data["date"] <= max_selection)
]
st.write(f"Data Points: {len(travel_data)}")
# Plot the GPS coordinates on the map
st.map(travel_data)
if show_histograms:
# Plot the histograms based on the dates of data points
years = travel_data.groupby(travel_data["date"].dt.year).count().plot(kind="bar")
years.set_xlabel("Year of Data Points")
hist_years = years.get_figure()
st.subheader("Data Split by Year")
st.pyplot(hist_years)
months = travel_data.groupby(travel_data["date"].dt.month).count().plot(kind="bar")
months.set_xlabel("Month of Data Points")
hist_months = months.get_figure()
st.subheader("Data Split by Months")
st.pyplot(hist_months)
hours = travel_data.groupby(travel_data["date"].dt.hour).count().plot(kind="bar")
hours.set_xlabel("Hour of Data Points")
hist_hours = hours.get_figure()
st.subheader("Data Split by Hours of Day")
st.pyplot(hist_hours)
if show_detailed_months:
month_year = (
travel_data.groupby([travel_data["date"].dt.year, travel_data["date"].dt.month])
.count()
.plot(kind="bar")
)
month_year.set_xlabel("Month, Year of Data Points")
hist_month_year = month_year.get_figure()
st.subheader("Data Split by Month, Year")
st.pyplot(hist_month_year)
if show_heatmap:
# Plot the heatmap using folium. It is resource intensive!
# Set the map to center around Munich, Germany (48.1351, 11.5820)
map_heatmap = folium.Map(location=[48.1351, 11.5820], zoom_start=11)
# Filter the DF for columns, then remove NaNs
heat_df = travel_data[["latitude", "longitude"]]
heat_df = heat_df.dropna(axis=0, subset=["latitude", "longitude"])
# List comprehension to make list of lists
heat_data = [
[row["latitude"], row["longitude"]] for index, row in heat_df.iterrows()
]
# Plot it on the map
HeatMap(heat_data).add_to(map_heatmap)
# Display the map using the community component
st.subheader("Heatmap")
folium_static(map_heatmap)
if show_images:
# Show the images from Flickr's public images
st.subheader("Image Highlights")
sample_data = travel_data.sample(n=images_count)
urls = get_pics_from_location(sample_data, images_count)
st.image(urls, width=200)
if show_code:
st.code(get_file_content_as_string("travel_viz.py"))