-
Notifications
You must be signed in to change notification settings - Fork 42
Expand file tree
/
Copy pathheatmap.py
More file actions
114 lines (85 loc) · 2.89 KB
/
heatmap.py
File metadata and controls
114 lines (85 loc) · 2.89 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import style
import datetime as dt
import pandas as pd
import bs4 as bs
import pickle
import requests
import os
import pandas_datareader.data as web
import yfinance as yf
style.use('ggplot')
# Extract Current top 50 Stocks which are part of Nifty50
def save_nifty50_tickers():
resp = requests.get('https://en.wikipedia.org/wiki/NIFTY_50')
soup = bs.BeautifulSoup(resp.text,'lxml')
table = soup.find('table', {'id':'constituents'})
tickers = []
for row in table.findAll('tr')[1:]:
ticker = row.findAll('td')[1].text
tickers.append(ticker)
with open('NIFTY_50.pickle','wb') as f:
pickle.dump(tickers, f)
#print(tickers)
return tickers
#Tickers:- Conatains all the symbols of the stocks
#Get 1 min data for each symbol within a period of 7days
def get_data_from_yahoo(reload_nifty50 = False):
if reload_nifty50:
tickers = save_nifty50_tickers()
else:
with open('NIFTY_50.pickle', 'rb') as f:
tickers = pickle.load(f)
if not os.path.exists('stock_dfs'):
os.makedirs('stock_dfs')
# start = dt.datetime(2010,1,1)
# end = dt.datetime.now()
for ticker in tickers:
# if not os.path.exists('stock_dfs/{}.csv'.format(ticker)):
df = yf.download(ticker, period = '7d',interval = '1m')
df.to_csv('stock_dfs/{}.csv'.format(ticker.replace('\n','')))
# else:
# print('Already have {}'.format(ticker))
# Join the Adj Close columns of all the csv file into single dataframe replace the name with ticker
def compile_data():
get_data_from_yahoo(True)
with open('NIFTY_50.pickle', 'rb') as f:
tickers = pickle.load(f)
main_df = pd.DataFrame()
get_data_from_yahoo()
for count, ticker in enumerate(tickers):
df = pd.read_csv('stock_dfs/{}.csv'.format(ticker.replace('\n','')))
df.set_index('Datetime', inplace=True)
df.rename(columns={'Adj Close': ticker.replace('\n','')}, inplace=True)
df.drop(['Open','High','Low','Close','Volume'], 1, inplace = True)
if main_df.empty:
main_df = df
else:
main_df = main_df.join(df, how='outer')
#print(main_df.head())
main_df.to_csv('nifty50_joined.csv')
# Vizualising the corelation on returns of the stocks to see if they follow normal distribution
def visualize_data():
compile_data()
df = pd.read_csv('nifty50_joined.csv')
df.set_index('Datetime', inplace = True)
df_corr = df.pct_change().corr()
data = df_corr.values
fig = plt.figure(figsize = (15, 10))
ax = fig.add_subplot(1,1,1)
heatmap = ax.pcolor(data, cmap = plt.cm.RdYlBu)
fig.colorbar(heatmap)
ax.set_xticks(np.arange(data.shape[0]) + 0.5, minor=False)
ax.set_yticks(np.arange(data.shape[1]) + 0.5, minor=False)
ax.invert_yaxis()
ax.xaxis.tick_top()
column_labels = df_corr.columns
row_labels = df_corr.index
ax.set_xticklabels(column_labels)
ax.set_yticklabels(row_labels)
plt.xticks(rotation = 90)
heatmap.set_clim(-1,1)
plt.tight_layout()
fig.savefig('heatmap.png')
visualize_data()