File tree 1 file changed +23
-1
lines changed
1 file changed +23
-1
lines changed Original file line number Diff line number Diff line change @@ -113,7 +113,7 @@ public function getMainContent(): string
113
113
}
114
114
115
115
if (isset ($ nodes [0 ])) {
116
- $ output = preg_replace ( ' /\s+/ ' , ' ' , $ nodes [0 ]->nodeValue );
116
+ $ output = $ this -> processMainContent ( $ nodes [0 ]->nodeValue );
117
117
}
118
118
}
119
119
} catch (Throwable $ e ) {
@@ -132,4 +132,26 @@ public function getMainContent(): string
132
132
133
133
return $ output ;
134
134
}
135
+
136
+ /**
137
+ * Process page DOM content
138
+ *
139
+ * @param string $content DOM node content
140
+ */
141
+ private function processMainContent ($ content ): string
142
+ {
143
+ // Clean up the DOM content
144
+ $ content = preg_replace ('/\s+/ ' , ' ' , $ content );
145
+ $ content = trim ($ content );
146
+
147
+ // set cutoff to allow room for other fields
148
+ $ cutoff = $ this ->config ()->get ('content_cutoff_bytes ' ) - 20000 ;
149
+
150
+ // If content is still too large, truncate it
151
+ if (strlen ($ content ) >= $ cutoff ) {
152
+ $ content = mb_strcut ($ content , 0 , $ cutoff );
153
+ }
154
+
155
+ return $ content ;
156
+ }
135
157
}
You can’t perform that action at this time.
0 commit comments