Skip to content

Commit a17c9a6

Browse files
used qemu to test memory constraints
1 parent 9721b1b commit a17c9a6

File tree

2 files changed

+68
-1
lines changed

2 files changed

+68
-1
lines changed

.github/workflows/test.yml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ jobs:
1111
strategy:
1212
matrix:
1313
python-version: [3.7, 3.8, 3.9]
14-
os: [ubuntu-latest, windows-latest]
14+
os: [ubuntu-latest]
1515

1616
steps:
1717
- uses: actions/checkout@v2
@@ -23,8 +23,12 @@ jobs:
2323

2424
- name: Install dev-package
2525
run: |
26+
sudo apt-get install qemu tree
27+
tree /usr
2628
python -m pip install --upgrade pip
2729
pip install -v -e .
30+
qemu-i386 -R 20M python time_match_strings.py
31+
2832
2933
- name: Run tests
3034
run: python -m unittest

time_match_strings.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
import pandas as pd
2+
import numpy as np
3+
from string_grouper import match_strings
4+
import random
5+
import time
6+
import os
7+
8+
# mem_limit = '1G'
9+
# procgov = r'C:\Users\heamu\Source\Repos\process-governor\ProcessGovernor\bin\x64\Debug\procgov.exe'
10+
# os.popen(f'{procgov} -r -m {mem_limit} -p {os.getpid()}')
11+
# time.sleep(1)
12+
progress = 0
13+
do_print = True
14+
companies = pd.read_csv('data/sec__edgar_company_info.csv')
15+
x0 = 10000
16+
Nx = 10000
17+
dNx = 1000
18+
Nx2 = 500000
19+
dNx2 = 50000
20+
y0 = 10000
21+
Ny = 10000
22+
dNy = 10000
23+
ns = 10
24+
# X = np.append(np.arange(dNx, Nx + 1, dNx), np.arange(dNx2 + dNx2, Nx2 + 1, dNx2))
25+
X = np.arange(x0, Nx + 1, dNx)
26+
Y = np.arange(y0, Ny + 1, dNy)
27+
means = np.full((len(X), len(Y)), 0)
28+
for s in range(ns):
29+
dgrid = []
30+
i = 1
31+
_ = print('[', flush=True, end='') if do_print else None
32+
for x in X:
33+
left_df = companies['Company Name'].iloc[random.sample(range(len(companies)), k = x)]
34+
if i > 1:
35+
_ = print(', ', flush=True) if do_print else None
36+
dseries = []
37+
stdseries = []
38+
_ = print('[', flush=True, end='') if do_print else None
39+
j = 1
40+
for y in Y:
41+
if j > 1:
42+
_ = print(', ', flush=True, end='') if do_print else None
43+
right_df = companies['Company Name'].iloc[random.sample(range(len(companies)), k = y)]
44+
t0 = time.time()
45+
_ = match_strings(right_df, left_df, n_blocks=(1, 1))
46+
t1 = time.time()
47+
dseries += [(t1 - t0)/60]
48+
progress += 1.0/(ns*len(X)*len(Y))
49+
# print(f'Progress {progress:.1%}', end='\x1b[1K\r')
50+
_ = print(f'{dseries[-1]}', flush=True, end='') if do_print else None
51+
# _ = print('.', flush=True, end='') if not do_print else None
52+
j += 1
53+
_ = print(']', flush=True, end='') if do_print else None
54+
dgrid += [dseries]
55+
i += 1
56+
# _ = print(f'{i}/{len(X)}', flush=True) if not do_print else None
57+
_ = print(']', flush=True) if do_print else None
58+
means = (np.asarray(dgrid) + s*means)/(s + 1)
59+
with open(f'runtime_means_x_{x0}-{Nx}_y_{y0}-{Ny}.npy', 'wb') as f:
60+
np.save(f, means)
61+
np.save(f, X)
62+
np.save(f, Y)
63+
#send_me_mail()

0 commit comments

Comments
 (0)