diff --git a/.changelog/current/2825-add-browserless-support.md b/.changelog/current/2825-add-browserless-support.md new file mode 100644 index 000000000..c920a806a --- /dev/null +++ b/.changelog/current/2825-add-browserless-support.md @@ -0,0 +1,3 @@ +# Added + +- Added support for browserless diff --git a/docs/dev/api/0.1.2/objects.yaml b/docs/dev/api/0.1.2/objects.yaml index 24f1b1047..7703543c5 100644 --- a/docs/dev/api/0.1.2/objects.yaml +++ b/docs/dev/api/0.1.2/objects.yaml @@ -48,6 +48,18 @@ VisibleInfoBlocks: example: false description: Show the list of tools in the UI +BrowserlessConfig: + type: object + properties: + url: + type: string + example: http://localhost:3000 + nullable: true + token: + type: string + example: ABCD + nullable: true + Config: type: object description: An object describing the configuration of the web app @@ -66,6 +78,8 @@ Config: description: True, if the user wished to print the recipe images with the rest of the recipes visibleInfoBlocks: $ref: "#/VisibleInfoBlocks" + browserless_config: + $reg: "#/BrowserlessConfig" Error: type: object diff --git a/docs/user/assets/settings-browserless.png b/docs/user/assets/settings-browserless.png new file mode 100644 index 000000000..96bbb305c Binary files /dev/null and b/docs/user/assets/settings-browserless.png differ diff --git a/docs/user/index.md b/docs/user/index.md index 929ece30e..e2571cf3a 100644 --- a/docs/user/index.md +++ b/docs/user/index.md @@ -98,3 +98,17 @@ Currently, the only way to share recipes is by sharing the Nextcloud folder that ### Public Sharing At the moment it is not possible to share a public link to a recipe. + +## Browserless configuration + +By default cookbook fetches recipe pages via code. Some websites detect this (bot detection) and will block this request from accessing the website resulting in being unable to import the recipe. +To prevent this you can configure browserless. Browserless is a service that allows downloading a webpage more user-like and thus bypasses a lot of bot-detections on websites. + +To set this up you should supply an URL and a token. + +Cookbook settings - Browserless + +You can create an account at https://www.browserless.io/ or setup a self-hosted instance. + +For the url be sure to supply the REST API url, see https://docs.browserless.io/overview/connection-urls. +For the Amsterdam region this would be: https://production-ams.browserless.io \ No newline at end of file diff --git a/lib/Controller/Implementation/ConfigImplementation.php b/lib/Controller/Implementation/ConfigImplementation.php index 49a0a2f63..1998b2e12 100644 --- a/lib/Controller/Implementation/ConfigImplementation.php +++ b/lib/Controller/Implementation/ConfigImplementation.php @@ -32,6 +32,7 @@ public function __construct( } protected const KEY_VISIBLE_INFO_BLOCKS = 'visibleInfoBlocks'; + protected const KEY_BROWSERLESS_CONFIG = 'browserless_config'; /** * Get the current configuration of the app @@ -46,6 +47,7 @@ public function list() { 'update_interval' => $this->dbCacheService->getSearchIndexUpdateInterval(), 'print_image' => $this->service->getPrintImage(), self::KEY_VISIBLE_INFO_BLOCKS => $this->service->getVisibleInfoBlocks(), + self::KEY_BROWSERLESS_CONFIG => $this->service->getBrowserlessConfig(), ], Http::STATUS_OK); } @@ -79,6 +81,10 @@ public function config() { $this->service->setVisibleInfoBlocks($data[self::KEY_VISIBLE_INFO_BLOCKS]); } + if (isset($data[self::KEY_BROWSERLESS_CONFIG])) { + $this->service->setBrowserlessConfig($data[self::KEY_BROWSERLESS_CONFIG]); + } + $this->dbCacheService->triggerCheck(); return new JSONResponse('OK', Http::STATUS_OK); diff --git a/lib/Helper/UserConfigHelper.php b/lib/Helper/UserConfigHelper.php index a4b2efe51..6a4c10bbf 100644 --- a/lib/Helper/UserConfigHelper.php +++ b/lib/Helper/UserConfigHelper.php @@ -41,6 +41,7 @@ public function __construct( protected const KEY_PRINT_IMAGE = 'print_image'; protected const KEY_VISIBLE_INFO_BLOCKS = 'visible_info_blocks'; protected const KEY_FOLDER = 'folder'; + protected const KEY_BROWSERLESS_CONFIG = 'browserless_config'; /** * Checks if the user is logged in and the configuration can be obtained at all @@ -226,4 +227,33 @@ public function getFolderName(): string { public function setFolderName(string $value): void { $this->setRawValue(self::KEY_FOLDER, $value); } + + /** + * Gets the browserless config from the configuration + * + * @return array keys: url and token, values: url and token + * @throws UserNotLoggedInException if no user is logged in + */ + public function getBrowserlessConfig(): array { + $rawValue = $this->getRawValue(self::KEY_BROWSERLESS_CONFIG); + + if ($rawValue === '') { + return [ + 'url' => null, + 'token' => null, + ]; + } + + return json_decode($rawValue, true); + } + + /** + * Sets the browserless config in the configuration + * + * @param array keys: url and token, values: url and token + * @throws UserNotLoggedInException if no user is logged in + */ + public function setBrowserlessConfig(array $data): void { + $this->setRawValue(self::KEY_BROWSERLESS_CONFIG, json_encode($data)); + } } diff --git a/lib/Service/HtmlDownloadService.php b/lib/Service/HtmlDownloadService.php index 5ce2013ef..1b00aab86 100644 --- a/lib/Service/HtmlDownloadService.php +++ b/lib/Service/HtmlDownloadService.php @@ -13,6 +13,7 @@ use OCA\Cookbook\Helper\HTMLFilter\HtmlEncodingFilter; use OCA\Cookbook\Helper\HTMLFilter\HtmlEntityDecodeFilter; use OCA\Cookbook\Helper\HtmlToDomParser; +use OCA\Cookbook\Helper\UserConfigHelper; use OCP\IL10N; use Psr\Log\LoggerInterface; @@ -44,6 +45,9 @@ class HtmlDownloadService { /** @var DownloadEncodingHelper */ private $downloadEncodingHelper; + /** @var UserConfigHelper */ + private $userConfigHelper; + /** * @var DOMDocument */ @@ -58,6 +62,7 @@ public function __construct( DownloadHelper $downloadHelper, EncodingGuessingHelper $encodingGuesser, DownloadEncodingHelper $downloadEncodingHelper, + UserConfigHelper $userConfigHelper, ) { $this->htmlFilters = [ $htmlEntityDecodeFilter, @@ -69,6 +74,7 @@ public function __construct( $this->downloadHelper = $downloadHelper; $this->encodingGuesser = $encodingGuesser; $this->downloadEncodingHelper = $downloadEncodingHelper; + $this->userConfigHelper = $userConfigHelper; } /** @@ -82,7 +88,16 @@ public function __construct( * @throws ImportException If obtaining of the URL was not possible */ public function downloadRecipe(string $url): int { - $html = $this->fetchHtmlPage($url); + $browserlessConfig = $this->userConfigHelper->getBrowserlessConfig(); + + // Check if a browserless configuration is available + if (!empty($browserlessConfig['url']) && !empty($browserlessConfig['token'])) { + // Use Browserless API if the url and token are set + $html = $this->fetchHtmlPageUsingBrowserless($url); + } else { + // Otherwise, use the standard method + $html = $this->fetchHtmlPage($url); + } // Filter the HTML code /** @var AbstractHtmlFilter $filter */ @@ -104,6 +119,61 @@ public function getDom(): ?DOMDocument { return $this->dom; } + /** + * Fetch an HTML page from Browserless.io or self hosted Browserless (rendered HTML) + * + * @param string $url The URL of the page to fetch + * + * @throws ImportException If the given URL was not fetched or parsed + * + * @return string The rendered HTML content as a plain string + */ + private function fetchHtmlPageUsingBrowserless(string $url): string { + // Get the browserless config from configuration or setting + $browserlessConfig = $this->userConfigHelper->getBrowserlessConfig(); + $browserlessAddress = $browserlessConfig['url']; + $browserlessToken = $browserlessConfig['token']; + + if (empty($browserlessAddress)) { + // Handle the case where Browserless address is not configured + $this->logger->error('Browserless address is not set.'); + throw new ImportException($this->l->t('Browserless address is not configured.')); + } + + if (empty($browserlessToken)) { + // Handle the case where Browserless token is not configured + $this->logger->error('Browserless token is not set.'); + throw new ImportException($this->l->t('Browserless token is not configured.')); + } + + // API endpoint for Browserless + $apiEndpoint = $browserlessAddress . '/chromium/content?token=' . $browserlessToken; + + $langCode = $this->l->getLocaleCode(); + $langCode = str_replace('_', '-', $langCode); + + // Prepare the data to be sent in the POST request + $data = json_encode([ + 'url' => $url, + 'userAgent' => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:134.0) Gecko/20100101 Firefox/134.0', + 'setExtraHTTPHeaders' => [ + 'Accept-Language' => "$langCode,en;q=0.5", + ], + ]); + + $opt = [ + CURLOPT_USERAGENT => 'Mozilla/5.0 (X11; Linux x86_64; rv:129.0) Gecko/20100101 Firefox/129.0', + CURLOPT_POSTFIELDS => $data, + CURLOPT_CUSTOMREQUEST => 'POST', + ]; + + $headers = [ + 'Content-Type: application/json', + ]; + + return $this->fetchContent($apiEndpoint, $opt, $headers); + } + /** * Fetch an HTML page from the internet * @@ -143,8 +213,12 @@ private function fetchHtmlPage(string $url): string { 'TE: trailers' ]; + return $this->fetchContent($url, $opt, $headers); + } + + private function fetchContent(string $url, array $options, array $headers): string { try { - $this->downloadHelper->downloadFile($url, $opt, $headers); + $this->downloadHelper->downloadFile($url, $options, $headers); } catch (NoDownloadWasCarriedOutException $ex) { throw new ImportException($this->l->t('Exception while downloading recipe from %s.', [$url]), 0, $ex); } diff --git a/lib/Service/RecipeService.php b/lib/Service/RecipeService.php index c26ed514a..ea9f90a9c 100755 --- a/lib/Service/RecipeService.php +++ b/lib/Service/RecipeService.php @@ -239,7 +239,6 @@ public function addRecipe($json, $importedHtml = null) { $recipe_folder->move($new_path); } - } else { // This is a new recipe, create it $json['dateCreated'] = $now; @@ -290,7 +289,6 @@ public function addRecipe($json, $importedHtml = null) { $this->logger->warning('Failed to download an image using curl. Falling back to PHP default behavior.'); $full_image_data = file_get_contents($json['image']); } - } else { // The image is a local path try { @@ -301,7 +299,6 @@ public function addRecipe($json, $importedHtml = null) { } } } - } else { // The image field was empty, remove images in the recipe folder $this->imageService->dropImage($recipe_folder); @@ -419,7 +416,6 @@ private function migrateFolderStructure() { $recipe_folder = $user_folder->newFolder($recipe_name); $node->move($recipe_folder->getPath() . '/recipe.json'); - } elseif ($node instanceof Folder && strpos($node->getName(), '.json')) { // Rename folders with .json extensions (this was likely caused by a migration bug) $node->move(str_replace('.json', '', $node->getPath())); @@ -567,6 +563,22 @@ public function getVisibleInfoBlocks(): array { return $this->userConfigHelper->getVisibleInfoBlocks(); } + /** + * Get browserless configuration + * @return array keys: url and token, values: url and token + */ + public function getBrowserlessConfig(): array { + return $this->userConfigHelper->getBrowserlessConfig(); + } + + /** + * Sets browserless configuration. + * @param array keys: url and token, values: url and token + */ + public function setBrowserlessConfig(array $data) { + $this->userConfigHelper->setBrowserlessConfig($data); + } + /** * Get recipe file contents as an array */ diff --git a/src/components/Modals/SettingsDialog.vue b/src/components/Modals/SettingsDialog.vue index 7d96821d0..e81623e6d 100644 --- a/src/components/Modals/SettingsDialog.vue +++ b/src/components/Modals/SettingsDialog.vue @@ -157,6 +157,48 @@ + +
+
    +
  • + + +
  • +
  • + + +
  • +
+
+
} */ const updateInterval = ref(0); +/** + * @type {import('vue').Ref} + */ +const browserlessUrl = ref(''); +/** + * @type {import('vue').Ref} + */ +const browserlessToken = ref(''); /** * @type {import('vue').Ref} */ @@ -384,6 +434,48 @@ const pickRecipeFolder = () => { }); }; +watch( + () => browserlessUrl.value, + async (newVal, oldVal) => { + if (!writeChanges.value) { + return; + } + try { + await api.config.browserlessConfig.update({ + url: newVal, + token: browserlessToken.value, + }); + await store.dispatch('refreshConfig'); + } catch { + await showSimpleAlertModal( + t('cookbook', 'Could not save Browserless url'), + ); + browserlessUrl.value = oldVal; // Revert if save fails + } + }, +); + +watch( + () => browserlessToken.value, + async (newVal, oldVal) => { + if (!writeChanges.value) { + return; + } + try { + await api.config.browserlessConfig.update({ + token: newVal, + url: browserlessUrl.value, + }); + await store.dispatch('refreshConfig'); + } catch { + await showSimpleAlertModal( + t('cookbook', 'Could not save Browserless token'), + ); + browserlessToken.value = oldVal; // Revert if save fails + } + }, +); + /** * Reindex all recipes */ @@ -435,6 +527,8 @@ const handleShowSettings = () => { store.state.localSettings.showFiltersInRecipeList; updateInterval.value = config.update_interval; recipeFolder.value = config.folder; + browserlessUrl.value = config.browserless_config.url; + browserlessToken.value = config.browserless_config.token; nextTick(() => { writeChanges.value = true; @@ -470,4 +564,8 @@ export default { display: block; width: 100%; } + +#settings-section_settings-browserless-config input { + width: auto; +} diff --git a/src/js/api-interface.js b/src/js/api-interface.js index 9150bd61f..969371e96 100644 --- a/src/js/api-interface.js +++ b/src/js/api-interface.js @@ -108,6 +108,10 @@ function updateVisibleInfoBlocks(visibleInfoBlocks) { return instance.post(`${baseUrl}/config`, { visibleInfoBlocks }); } +function updateBrowserlessConfig(data) { + return instance.post(`${baseUrl}/config`, { browserless_config: data }); +} + function reindex() { return instance.post(`${baseUrl}/reindex`); } @@ -146,5 +150,8 @@ export default { visibleInfoBlocks: { update: updateVisibleInfoBlocks, }, + browserlessConfig: { + update: updateBrowserlessConfig, + }, }, }; diff --git a/tests/Unit/Controller/Implementation/ConfigImplementationTest.php b/tests/Unit/Controller/Implementation/ConfigImplementationTest.php index e7ed49d68..c376964d3 100644 --- a/tests/Unit/Controller/Implementation/ConfigImplementationTest.php +++ b/tests/Unit/Controller/Implementation/ConfigImplementationTest.php @@ -89,6 +89,7 @@ public function testList(): void { 'update_interval' => $interval, 'print_image' => $printImage, 'visibleInfoBlocks' => [], + 'browserless_config' => [] ]; $this->userFolder->method('getPath')->willReturn($folder); @@ -111,8 +112,9 @@ public function testList(): void { * @param mixed $interval * @param mixed $printImage * @param mixed $visibleInfoBlocks + * @param mixed $browserlessConfig */ - public function testConfig($data, $folderPath, $interval, $printImage, $visibleInfoBlocks): void { + public function testConfig($data, $folderPath, $interval, $printImage, $visibleInfoBlocks, $browserlessConfig): void { $this->restParser->method('getParameters')->willReturn($data); $this->dbCacheService->expects($this->once())->method('triggerCheck'); @@ -143,6 +145,12 @@ public function testConfig($data, $folderPath, $interval, $printImage, $visibleI $this->recipeService->expects($this->once())->method('setVisibleInfoBlocks')->with($visibleInfoBlocks); } + if (is_null($browserlessConfig)) { + $this->recipeService->expects($this->never())->method('setBrowserlessConfig'); + } else { + $this->recipeService->expects($this->once())->method('setBrowserlessConfig')->with($browserlessConfig); + } + /** * @var JSONResponse $response */ @@ -154,20 +162,52 @@ public function testConfig($data, $folderPath, $interval, $printImage, $visibleI public static function dataProviderConfig() { return [ 'noChange' => [ - [], null, null, null, null + [], + null, + null, + null, + null, + null ], 'changeFolder' => [ - ['folder' => '/path/to/whatever'], '/path/to/whatever', null, null, null + ['folder' => '/path/to/whatever'], + '/path/to/whatever', + null, + null, + null, + null ], 'changeinterval' => [ - ['update_interval' => 15], null, 15, null, null + ['update_interval' => 15], + null, + 15, + null, + null, + null ], 'changePrint' => [ - ['print_image' => true], null, null, true, null + ['print_image' => true], + null, + null, + true, + null, + null ], 'changeVisibleBlocks' => [ ['visibleInfoBlocks' => ['cooking-time' => true, 'preparation-time' => true]], - null, null, null, ['cooking-time' => true, 'preparation-time' => true] + null, + null, + null, + ['cooking-time' => true, 'preparation-time' => true], + null + ], + 'browserlessConfig' => [ + ['browserless_config' => ['url' => 'https://something.com', 'token' => '123456789']], + null, + null, + null, + null, + ['url' => 'https://something.com', 'token' => '123456789'] ], 'changeAll' => [ [ @@ -175,7 +215,13 @@ public static function dataProviderConfig() { 'update_interval' => 12, 'print_image' => false, 'visibleInfoBlocks' => ['cooking-time' => true, 'preparation-time' => true], - ], '/my/custom/path', 12, false, ['cooking-time' => true, 'preparation-time' => true] + 'browserless_config' => ['url' => 'https://something.com', 'token' => '123456789'] + ], + '/my/custom/path', + 12, + false, + ['cooking-time' => true, 'preparation-time' => true], + ['url' => 'https://something.com', 'token' => '123456789'] ], ]; } diff --git a/tests/Unit/Helper/UserConfigHelperTest.php b/tests/Unit/Helper/UserConfigHelperTest.php index 9d2dab2b9..cf9994e18 100644 --- a/tests/Unit/Helper/UserConfigHelperTest.php +++ b/tests/Unit/Helper/UserConfigHelperTest.php @@ -162,4 +162,28 @@ public function testNoUser() { $this->expectException(UserNotLoggedInException::class); $this->dut->getFolderName(); } + + public function testGetBrowserlessConfig() { + $this->config->expects($this->once())->method('setUserValue')->with( + $this->userId, 'cookbook', 'browserless_config', + json_encode([ + 'url' => 'https://example.com', + 'token' => 'token', + ]) + ); + $this->dut->setBrowserlessConfig([ + 'url' => 'https://example.com', + 'token' => 'token', + ]); + + $this->config->expects($this->once())->method('getUserValue') + ->with($this->userId, 'cookbook', 'browserless_config') + ->willReturn(json_encode([ + 'url' => 'https://example.com', + 'token' => 'token', + ])); + + $this->assertEquals(['url' => 'https://example.com', 'token' => 'token'], $this->dut->getBrowserlessConfig()); + + } } diff --git a/tests/Unit/Service/HtmlDownloadServiceTest.php b/tests/Unit/Service/HtmlDownloadServiceTest.php index 195f6005c..e63157c3d 100644 --- a/tests/Unit/Service/HtmlDownloadServiceTest.php +++ b/tests/Unit/Service/HtmlDownloadServiceTest.php @@ -12,6 +12,7 @@ use OCA\Cookbook\Helper\HTMLFilter\HtmlEncodingFilter; use OCA\Cookbook\Helper\HTMLFilter\HtmlEntityDecodeFilter; use OCA\Cookbook\Helper\HtmlToDomParser; +use OCA\Cookbook\Helper\UserConfigHelper; use OCA\Cookbook\Service\HtmlDownloadService; use OCP\IL10N; use PHPUnit\Framework\MockObject\MockObject; @@ -47,6 +48,9 @@ class HtmlDownloadServiceTest extends TestCase { /** @var DownloadEncodingHelper|MockObject */ private $downloadEncodingHelper; + /** @var \OCA\Cookbook\Helper\UserConfigHelper|MockObject */ + private $userConfigHelper; + /** * @var HtmlDownloadService */ @@ -73,10 +77,19 @@ public function setUp(): void { $this->downloadHelper = $this->createMock(DownloadHelper::class); $this->encodingGuesser = $this->createMock(EncodingGuessingHelper::class); $this->downloadEncodingHelper = $this->createMock(DownloadEncodingHelper::class); + $this->userConfigHelper = $this->createMock(UserConfigHelper::class); $this->sut = new HtmlDownloadService( - $this->htmlEntityDecodeFilter, $this->htmlEncodingFilter, $this->il10n, $logger, $this->htmlParser, - $this->downloadHelper, $this->encodingGuesser, $this->downloadEncodingHelper); + $this->htmlEntityDecodeFilter, + $this->htmlEncodingFilter, + $this->il10n, + $logger, + $this->htmlParser, + $this->downloadHelper, + $this->encodingGuesser, + $this->downloadEncodingHelper, + $this->userConfigHelper + ); } public function testDownloadInvalidUrl() { @@ -96,7 +109,10 @@ public function testDownloadFailing() { public static function dpBadStatus() { return [ - [180], [199], [300], [404] + [180], + [199], + [300], + [404] ]; } @@ -122,7 +138,7 @@ public function testDownload() { $encoding = 'utf-8'; $this->downloadHelper->expects($this->once()) - ->method('downloadFile'); + ->method('downloadFile')->with($url, $this->anything(), $this->anything()); $this->downloadHelper->method('getStatus')->willReturn(200); $this->downloadHelper->method('getContent')->willReturn($content); $this->downloadHelper->method('getContentType')->willReturn($contentType); @@ -178,4 +194,46 @@ public function testDownloadWithoutEncoding() { $this->assertSame($dom, $this->sut->getDom()); } + + public function testDownloadWithBrowserless() { + $url = 'http://example.com'; + $content = 'The content of the html file'; + $dom = $this->createStub(DOMDocument::class); + $state = 12345; + $contentType = 'The content type'; + $encoding = 'utf-8'; + $browserlessUrl = 'http://browserless.url/chromium/content?token=token'; + + $this->downloadHelper->expects($this->once()) + ->method('downloadFile')->with($browserlessUrl, $this->anything(), $this->anything()); + + $this->il10n->method('getLocaleCode')->willReturn('en-US'); + $this->userConfigHelper->method('getBrowserlessConfig')->willReturn([ + 'url' => 'http://browserless.url', + 'token' => 'token', + ]); + + $this->downloadHelper->method('getStatus')->willReturn(200); + $this->downloadHelper->method('getContent')->willReturn($content); + $this->downloadHelper->method('getContentType')->willReturn($contentType); + + $this->encodingGuesser->method('guessEncoding') + ->with($content, $contentType) + ->willReturn($encoding); + $this->downloadEncodingHelper->method('encodeToUTF8') + ->with($content, $encoding)->willReturnArgument(0); + + $this->htmlParser->expects($this->once())->method('loadHtmlString') + ->with( + $this->anything(), + $this->equalTo($url), + $this->equalTo($content) + )->willReturn($dom); + $this->htmlParser->method('getState')->willReturn($state); + + $ret = $this->sut->downloadRecipe($url); + $this->assertEquals($state, $ret); + + $this->assertSame($dom, $this->sut->getDom()); + } }