@@ -25,23 +25,25 @@ def awesome_cossim_topn(
25
25
"""
26
26
This function will return a matrix C in CSR format, where
27
27
C = [sorted top n results > lower_bound for each row of A * B].
28
- If return_best_topn=True it will also return best_topn (the
29
- true maximum number of elements > lower_bound per row of A * B).
28
+ If return_best_topn=True then best_topn
29
+ (the true maximum number of elements > lower_bound per row of A * B)
30
+ will also be returned in a tuple together with C as (C, best_topn).
30
31
31
32
Input:
32
33
A and B: two CSR matrices
33
- ntop: n top results
34
- lower_bound: a threshold that the element of A*B must greater than
35
- use_threads: use multi-thread or not
34
+ ntop: top n results
35
+ lower_bound: a threshold that the element of A*B must be greater than
36
+ use_threads: use multi-thread or not
36
37
n_jobs: number of thread, must be >= 1
37
- ntop_is_flexible: if True, memory management will be handed over to C/C++ if
38
- python's attempt at allocating memory fails.
39
- mem_manager_is_C: (this is mainly for testing purposes) if True, will force
40
- memory management to be handed over to C/C++. Should be
41
- used only when ntop >= number of columns of B or
42
- ntop_is_flexible=True. Defaults to False.
43
- return_best_topn: if True, will return best_topn together with C as a tuple:
44
- (C, best_topn)
38
+ ntop_is_flexible: (default: False) if True, memory management will be handed
39
+ over to C/C++ whenever python's attempt at allocating
40
+ memory fails.
41
+ mem_manager_is_C: (default: False) this is mainly for testing purposes. if
42
+ True, will force memory management to be handed over to
43
+ C/C++. Should be used only when ntop >= number of columns
44
+ of B or ntop_is_flexible=True.
45
+ return_best_topn: (default: False) if True, will return best_topn together
46
+ with C as a tuple: (C, best_topn)
45
47
46
48
Output:
47
49
C: result matrix (returned alone, if return_best_topn=False)
@@ -80,11 +82,13 @@ def awesome_cossim_topn(
80
82
return output
81
83
82
84
# filled matrices from here on
83
- indptr = np .empty (M + 1 , dtype = idx_dtype )
85
+ indptr = np .empty (M + 1 , dtype = idx_dtype )
84
86
try :
85
87
indices = np .empty (nnz_max , dtype = idx_dtype )
86
88
data = np .empty (nnz_max , dtype = A .dtype )
89
+
87
90
if mem_manager_is_C : raise MemoryError # This is mainly for testing purposes
91
+
88
92
except MemoryError :
89
93
# if mem_manager_is_C: print('Exception raised! Continuing ...', flush=True)
90
94
if ntop_is_flexible or ntop >= N :
@@ -107,7 +111,6 @@ def awesome_cossim_topn(
107
111
lower_bound ,
108
112
indptr
109
113
)
110
-
111
114
else :
112
115
113
116
indices , data , best_topn = ct_thread .sparse_dot_free_threaded (
@@ -120,14 +123,19 @@ def awesome_cossim_topn(
120
123
lower_bound ,
121
124
indptr , n_jobs
122
125
)
123
-
124
126
else :
127
+
125
128
if mem_manager_is_C :
126
- raise Exception ('When mem_manager_is_C=True, set ntop >= N, or set ntop_is_flexible=True' )
129
+ raise Exception (
130
+ 'When mem_manager_is_C=True, set ntop >= B.shape[1], or set ntop_is_flexible=True'
131
+ )
127
132
else :
128
- raise Exception ('Not enough memory! Data array is too large. Try reducing the value of ntop.' )
129
-
133
+ raise Exception (
134
+ 'Not enough memory! Data array is too large. Try reducing the value of ntop.'
135
+ 'or set ntop_is_flexible=True'
136
+ )
130
137
else :
138
+ # no exception was raised; then use old function (as it is expected to be the fastest)
131
139
132
140
best_topn_arr = np .full (1 , 0 , dtype = idx_dtype )
133
141
@@ -144,7 +152,6 @@ def awesome_cossim_topn(
144
152
lower_bound ,
145
153
indptr , indices , data , best_topn_arr
146
154
)
147
-
148
155
else :
149
156
if n_jobs < 1 :
150
157
err_str = 'Whenever you select the multi-thread mode, n_job must be greater than or equal to 1!'
@@ -161,9 +168,9 @@ def awesome_cossim_topn(
161
168
lower_bound ,
162
169
indptr , indices , data , best_topn_arr , n_jobs
163
170
)
164
-
165
171
best_topn = best_topn_arr [0 ]
166
172
173
+ # prepare and return the output:
167
174
output = csr_matrix ((data , indices , indptr ), shape = (M , N ))
168
175
if return_best_topn :
169
176
return output , best_topn
0 commit comments