Skip to content

Commit 72b05f8

Browse files
authored
as per requested by Edmund L
1 parent 7dcd1f3 commit 72b05f8

File tree

1 file changed

+214
-0
lines changed

1 file changed

+214
-0
lines changed

CFTC.py

Lines changed: 214 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,214 @@
1+
#!/usr/bin/env python
2+
# coding: utf-8
3+
4+
5+
6+
#scrape cftc trader commitment report
7+
8+
9+
# In[1]:
10+
11+
12+
import requests
13+
import pandas as pd
14+
import re
15+
import os
16+
os.chdir('H:/')
17+
18+
19+
# In[2]:
20+
21+
22+
#scraping function
23+
def scrape(url):
24+
25+
session=requests.Session()
26+
27+
session.headers.update(
28+
{'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36'})
29+
30+
response=session.get(url)
31+
32+
return response
33+
34+
35+
# In[3]:
36+
37+
38+
#get data
39+
def etl(response):
40+
41+
#create a list
42+
text=response.content.decode('utf-8').split('\r')
43+
44+
45+
#create index for each block
46+
assets=[i for i in text if 'CHICAGO MERCANTILE EXCHANGE' in i]
47+
ind=[text.index(i) for i in assets]
48+
49+
50+
overall=[]
51+
52+
#etl
53+
for i in ind:
54+
55+
commodity=text[i].split(' - CHICAGO MERCANTILE EXCHANGE')[0].replace('\n','')
56+
commodity_code=text[i].split('Code-')[-1].replace('\n','')
57+
date=re.search('\d{2}\/\d{2}\/\d{2}',text[i+1]).group()
58+
contractunit=re.search('(?<=\().*(?=OPEN INTEREST)',text[i+7]).group().replace(')','')
59+
open_interest=re.search('(?<=OPEN INTEREST\:).*',text[i+7]).group()
60+
non_commercial_long_commitment,non_commercial_short_commitment, \
61+
non_commercial_spread_commitment,commercial_long_commitment, \
62+
commercial_short_commitment,total_long_commitment,total_short_commitment, \
63+
non_reportable_long_commitment,non_reportable_short_commitment=re.findall('\S+',text[i+9])
64+
changedate=re.search('\d{2}\/\d{2}\/\d{2}',text[i+11]).group()
65+
change_open_interest=text[i+11].split(' ')[-1].replace(')','')
66+
non_commercial_long_change,non_commercial_short_change, \
67+
non_commercial_spread_change,commercial_long_change, \
68+
commercial_short_change,total_long_change,total_short_change, \
69+
non_reportable_long_change,non_reportable_short_change=re.findall('\S+',text[i+12])
70+
non_commercial_long_percent,non_commercial_short_percent, \
71+
non_commercial_spread_percent,commercial_long_percent, \
72+
commercial_short_percent,total_long_percent,total_short_percent, \
73+
non_reportable_long_percent,non_reportable_short_percent=re.findall('\S+',text[i+15])
74+
totaltraders=text[i+17].split(' ')[-1].replace(')','')
75+
non_commercial_long_traders,non_commercial_short_traders, \
76+
non_commercial_spread_traders,commercial_long_traders, \
77+
commercial_short_traders,total_long_traders,total_short_traders=re.findall('\S+',text[i+18])
78+
79+
temp=[commodity,commodity_code,date,contractunit,open_interest,
80+
non_commercial_long_commitment,non_commercial_short_commitment,
81+
non_commercial_spread_commitment,commercial_long_commitment,
82+
commercial_short_commitment,total_long_commitment,
83+
total_short_commitment,non_reportable_long_commitment,
84+
non_reportable_short_commitment,changedate,change_open_interest,
85+
non_commercial_long_change,non_commercial_short_change,
86+
non_commercial_spread_change,commercial_long_change,
87+
commercial_short_change,total_long_change,total_short_change,
88+
non_reportable_long_change,non_reportable_short_change,
89+
non_commercial_long_percent,non_commercial_short_percent,
90+
non_commercial_spread_percent,commercial_long_percent,
91+
commercial_short_percent,total_long_percent,
92+
total_short_percent,non_reportable_long_percent,
93+
non_reportable_short_percent,totaltraders,
94+
non_commercial_long_traders,non_commercial_short_traders,
95+
non_commercial_spread_traders,commercial_long_traders,
96+
commercial_short_traders,total_long_traders,total_short_traders]
97+
98+
overall+=temp
99+
100+
101+
colnames=['commodity',
102+
'commodity_code',
103+
'date',
104+
'contract_unit',
105+
'open_interest',
106+
'non_commercial_long_commitment',
107+
'non_commercial_short_commitment',
108+
'non_commercial_spread_commitment',
109+
'commercial_long_commitment',
110+
'commercial_short_commitment',
111+
'total_long_commitment',
112+
'total_short_commitment',
113+
'non_reportable_long_commitment',
114+
'non_reportable_short_commitment',
115+
'change_date',
116+
'change_open_interest',
117+
'non_commercial_long_change',
118+
'non_commercial_short_change',
119+
'non_commercial_spread_change',
120+
'commercial_long_change',
121+
'commercial_short_change',
122+
'total_long_change',
123+
'total_short_change',
124+
'non_reportable_long_change',
125+
'non_reportable_short_change',
126+
'non_commercial_long_percent',
127+
'non_commercial_short_percent',
128+
'non_commercial_spread_percent',
129+
'commercial_long_percent',
130+
'commercial_short_percent',
131+
'total_long_percent',
132+
'total_short_percent',
133+
'non_reportable_long_percent',
134+
'non_reportable_short_percent',
135+
'total_traders',
136+
'non_commercial_long_traders',
137+
'non_commercial_short_traders',
138+
'non_commercial_spread_traders',
139+
'commercial_long_traders',
140+
'commercial_short_traders',
141+
'total_long_traders',
142+
'total_short_traders']
143+
144+
145+
#create dataframe
146+
df=pd.DataFrame(columns=colnames)
147+
148+
149+
for i in range(len(colnames)):
150+
df[colnames[i]]=overall[i::len(colnames)]
151+
152+
153+
#transform
154+
ind=['commodity', 'commodity_code','change_date',
155+
'date', 'contract_unit', 'open_interest',
156+
'change_open_interest','total_traders']
157+
158+
df=df.melt(id_vars=ind,value_vars=[i for i in df.columns if i not in ind])
159+
160+
#isolate position
161+
df['position']=''
162+
163+
ind_long=df.loc[df['variable'].apply(lambda x: 'long' in x )].index
164+
ind_short=df.loc[df['variable'].apply(lambda x: 'short' in x )].index
165+
ind_spread=df.loc[df['variable'].apply(lambda x: 'spread' in x )].index
166+
167+
for i in ind_spread:
168+
df.at[i,'position']='spread'
169+
for i in ind_short:
170+
df.at[i,'position']='short'
171+
for i in ind_long:
172+
df.at[i,'position']='long'
173+
174+
df['variable']=df['variable'].str.replace('long_','').str.replace('short_','').str.replace('spread_','')
175+
176+
#isolate type
177+
df['type']=df['variable'].apply(lambda x:'_'.join(x.split('_')[:-1]))
178+
179+
#clean variable name
180+
df['variable']=df['variable'].apply(lambda x:x.split('_')[-1])
181+
182+
df['variable']=df['variable'].str.replace('percent',
183+
'percent_of_open_interest_for_each_type_of_traders')
184+
185+
df['variable']=df['variable'].str.replace('traders',
186+
'number_of_traders_in_each_type')
187+
188+
#change col order
189+
df=df[['commodity', 'commodity_code', 'change_date',
190+
'date', 'contract_unit','open_interest',
191+
'change_open_interest', 'total_traders',
192+
'type','position','variable','value', ]]
193+
194+
return df
195+
196+
197+
# In[4]:
198+
199+
def main():
200+
201+
url='https://www.cftc.gov/dea/futures/deacmesf.htm'
202+
203+
#scrape
204+
response=scrape(url)
205+
206+
#get data
207+
df=etl(option_url)
208+
209+
df.to_csv('trader commitment report.csv',index=False)
210+
211+
212+
if __name__ == "__main__":
213+
main()
214+

0 commit comments

Comments
 (0)