Skip to content

Commit

Permalink
Merge pull request #105 from Recipe-Project/feature/async_thumbnail_c…
Browse files Browse the repository at this point in the history
…rawling

Feature/async thumbnail crawling
  • Loading branch information
joona95 authored Aug 10, 2024
2 parents ff80e7d + 3301910 commit 3e69036
Show file tree
Hide file tree
Showing 3 changed files with 119 additions and 63 deletions.
26 changes: 26 additions & 0 deletions src/main/java/com/recipe/app/src/config/AsyncConfig.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package com.recipe.app.src.config;

import org.springframework.context.annotation.Configuration;
import org.springframework.scheduling.annotation.AsyncConfigurer;
import org.springframework.scheduling.annotation.EnableAsync;
import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;

import java.util.concurrent.Executor;

@EnableAsync
@Configuration
public class AsyncConfig implements AsyncConfigurer {

@Override
public Executor getAsyncExecutor() {

ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor();
executor.setCorePoolSize(5);
executor.setMaxPoolSize(10);
executor.setQueueCapacity(100);
executor.setThreadNamePrefix("AsyncExecutor-");
executor.initialize();

return executor;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,10 @@
import com.recipe.app.src.recipe.infra.blog.BlogRecipeRepository;
import io.github.resilience4j.circuitbreaker.annotation.CircuitBreaker;
import lombok.extern.slf4j.Slf4j;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;

import java.net.URL;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
Expand All @@ -33,10 +28,13 @@ public class BlogRecipeClientSearchService {

private final BlogRecipeRepository blogRecipeRepository;
private final NaverFeignClient naverFeignClient;
private final BlogRecipeThumbnailCrawlingService blogRecipeThumbnailCrawlingService;

public BlogRecipeClientSearchService(BlogRecipeRepository blogRecipeRepository, NaverFeignClient naverFeignClient) {
public BlogRecipeClientSearchService(BlogRecipeRepository blogRecipeRepository, NaverFeignClient naverFeignClient,
BlogRecipeThumbnailCrawlingService blogRecipeThumbnailCrawlingService) {
this.blogRecipeRepository = blogRecipeRepository;
this.naverFeignClient = naverFeignClient;
this.blogRecipeThumbnailCrawlingService = blogRecipeThumbnailCrawlingService;
}

@CircuitBreaker(name = "recipe-blog-search", fallbackMethod = "fallback")
Expand All @@ -51,12 +49,10 @@ public List<BlogRecipe> searchNaverBlogRecipes(String keyword, int size) {
NAVER_BLOG_SEARCH_SORT,
keyword + " 레시피").toEntity();

for (BlogRecipe blogRecipe : blogRecipes) {
blogRecipe.changeThumbnail(getBlogThumbnailUrl(blogRecipe.getBlogUrl()));
}

createBlogRecipes(blogRecipes);

blogRecipeThumbnailCrawlingService.saveThumbnails(blogRecipes);

return blogRecipes.subList(0, size);
}

Expand All @@ -78,57 +74,4 @@ public void createBlogRecipes(List<BlogRecipe> blogRecipes) {
.filter(blogRecipe -> !existBlogRecipeMapByBlogUrl.containsKey(blogRecipe.getBlogUrl()))
.collect(Collectors.toList()));
}

private String getBlogThumbnailUrl(String blogUrl) {

if (blogUrl.contains("naver")) {
return getNaverBlogThumbnailUrl(blogUrl);
} else if (blogUrl.contains("tistory")) {
return getTistoryBlogThumbnailUrl(blogUrl);
} else {
return "";
}
}

private String getNaverBlogThumbnailUrl(String blogUrl) {

try {

URL url = new URL(blogUrl);
Document doc = Jsoup.parse(url, 5000);

Elements iframes = doc.select("iframe#mainFrame");
String src = iframes.attr("src");

String url2 = "http://blog.naver.com" + src;
Document doc2 = Jsoup.connect(url2).get();

return doc2.select("meta[property=og:image]").get(0).attr("content");
} catch (Exception e) {
return "";
}
}

private String getTistoryBlogThumbnailUrl(String blogUrl) {

try {

Document doc = Jsoup.connect(blogUrl).get();

Elements imageLinks = doc.getElementsByTag("img");
String thumbnailUrl = null;
for (Element image : imageLinks) {
String temp = image.attr("src");
if (!temp.contains("admin")) {
thumbnailUrl = temp;
break;
}
}

return thumbnailUrl;
} catch (Exception e) {
return "";
}
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
package com.recipe.app.src.recipe.application.blog;

import com.recipe.app.src.recipe.domain.blog.BlogRecipe;
import com.recipe.app.src.recipe.infra.blog.BlogRecipeRepository;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.scheduling.annotation.Async;
import org.springframework.stereotype.Service;

import java.net.URL;
import java.util.List;

@Service
public class BlogRecipeThumbnailCrawlingService {

private final BlogRecipeRepository blogRecipeRepository;

public BlogRecipeThumbnailCrawlingService(BlogRecipeRepository blogRecipeRepository) {
this.blogRecipeRepository = blogRecipeRepository;
}

@Async
public void saveThumbnails(List<BlogRecipe> blogRecipes) {

System.out.println("thumbnail save");

for (BlogRecipe blogRecipe : blogRecipes) {
blogRecipe.changeThumbnail(getBlogThumbnailUrl(blogRecipe.getBlogUrl()));
}

blogRecipeRepository.saveAll(blogRecipes);
}

public String getBlogThumbnailUrl(String blogUrl) {

if (blogUrl.contains("naver")) {
return getNaverBlogThumbnailUrl(blogUrl);
} else if (blogUrl.contains("tistory")) {
return getTistoryBlogThumbnailUrl(blogUrl);
} else {
return "";
}
}

private String getNaverBlogThumbnailUrl(String blogUrl) {

try {

URL url = new URL(blogUrl);
Document doc = Jsoup.parse(url, 5000);

Elements iframes = doc.select("iframe#mainFrame");
String src = iframes.attr("src");

String url2 = "http://blog.naver.com" + src;
Document doc2 = Jsoup.connect(url2).get();

return doc2.select("meta[property=og:image]").get(0).attr("content");
} catch (Exception e) {
return "";
}
}

private String getTistoryBlogThumbnailUrl(String blogUrl) {

try {

Document doc = Jsoup.connect(blogUrl).get();

Elements imageLinks = doc.getElementsByTag("img");
String thumbnailUrl = null;
for (Element image : imageLinks) {
String temp = image.attr("src");
if (!temp.contains("admin")) {
thumbnailUrl = temp;
break;
}
}

return thumbnailUrl;
} catch (Exception e) {
return "";
}
}
}

0 comments on commit 3e69036

Please sign in to comment.