|
|
@@ -25,82 +25,85 @@ import java.util.List;
|
|
|
@Service
|
|
|
public class PopularFeelingsServiceImpl extends ServiceImpl<PopularFeelingsMapper, PopularFeelings> implements PopularFeelingsService {
|
|
|
|
|
|
- @Autowired
|
|
|
- private PopularFeelingsPageMapper popularFeelingsPageMapper;
|
|
|
-
|
|
|
- @Override
|
|
|
- public void start(PopularFeelings popular) {
|
|
|
- String[] urls = popular.getStartUrls().toArray(new String[]{});
|
|
|
-
|
|
|
- Spider spider = Spider.create(new PopularFeelingsProcessor(popular, this.popularFeelingsPageMapper));
|
|
|
- List<SpiderListener> listListeners = new ArrayList<>();
|
|
|
- listListeners.add(new SpiderListener() {
|
|
|
- @Override
|
|
|
- public void onSuccess(Request request) {
|
|
|
- log.info("onSuccess>>>>>>{}", request.getUrl());
|
|
|
- }
|
|
|
-
|
|
|
- @Override
|
|
|
- public void onError(Request request) {
|
|
|
- log.info("onError>>>>>>{}", request.getUrl());
|
|
|
-
|
|
|
- }
|
|
|
- });
|
|
|
- spider.setSpiderListeners(listListeners);
|
|
|
- spider.addUrl(urls);
|
|
|
- spider.runAsync();
|
|
|
- }
|
|
|
-
|
|
|
- @Override
|
|
|
- public void stop(PopularFeelings popular) {
|
|
|
-
|
|
|
- }
|
|
|
-
|
|
|
- static class PopularFeelingsProcessor implements PageProcessor {
|
|
|
- private final List<String> LIST_SPIDER_URLS = new ArrayList<>();
|
|
|
-
|
|
|
- private final Site site;
|
|
|
-
|
|
|
- private final PopularFeelings popular;
|
|
|
-
|
|
|
- private final PopularFeelingsPageMapper popularFeelingsPageMapper;
|
|
|
-
|
|
|
- public PopularFeelingsProcessor(PopularFeelings popular, PopularFeelingsPageMapper popularFeelingsPageMapper) {
|
|
|
- this.popular = popular;
|
|
|
- this.site = Site.me().setDomain(this.popular.getDomain()).setRetryTimes(3).setSleepTime(1000);
|
|
|
- this.popularFeelingsPageMapper = popularFeelingsPageMapper;
|
|
|
- }
|
|
|
-
|
|
|
- public void process(Page page) {
|
|
|
- List<String> listUrls = page.getHtml().links().all();
|
|
|
- listUrls.removeIf(s -> {
|
|
|
- log.info(s);
|
|
|
- return LIST_SPIDER_URLS.contains(s) || StringUtils.isBlank(s);
|
|
|
- });
|
|
|
- LIST_SPIDER_URLS.addAll(listUrls);
|
|
|
- log.info(">>>>>>>>{}", LIST_SPIDER_URLS.size());
|
|
|
- page.addTargetRequests(listUrls);
|
|
|
-
|
|
|
- Document document = page.getHtml().getDocument();
|
|
|
- String uri = document.baseUri();
|
|
|
- log.info(uri);
|
|
|
- String title = document.title();
|
|
|
- String bodyText = document.body().text();
|
|
|
- String html = document.html();
|
|
|
- log.info("{}>>>>>>{}", uri, title);
|
|
|
- PopularFeelingsPage popularFeelingsPage = new PopularFeelingsPage();
|
|
|
- popularFeelingsPage.setId(document.id());
|
|
|
- popularFeelingsPage.setPopularFeelingsId(popular.getId());
|
|
|
- popularFeelingsPage.setUrl(uri);
|
|
|
- popularFeelingsPage.setTitle(title);
|
|
|
- popularFeelingsPage.setBodyText(bodyText);
|
|
|
- popularFeelingsPage.setHtml(html);
|
|
|
- popularFeelingsPage.setSpiderTime(DateUtil.current());
|
|
|
- this.popularFeelingsPageMapper.insert(popularFeelingsPage);
|
|
|
- }
|
|
|
-
|
|
|
- public Site getSite() {
|
|
|
- return this.site;
|
|
|
- }
|
|
|
- }
|
|
|
+ @Autowired
|
|
|
+ private PopularFeelingsPageMapper popularFeelingsPageMapper;
|
|
|
+
|
|
|
+ @Override
|
|
|
+ public void start(PopularFeelings popular) {
|
|
|
+ String[] urls = popular.getStartUrls().toArray(new String[]{});
|
|
|
+
|
|
|
+ Spider spider = Spider.create(new PopularFeelingsProcessor(popular, this.popularFeelingsPageMapper));
|
|
|
+ List<SpiderListener> listListeners = new ArrayList<>();
|
|
|
+ listListeners.add(new SpiderListener() {
|
|
|
+ @Override
|
|
|
+ public void onSuccess(Request request) {
|
|
|
+ log.info("onSuccess>>>>>>{}", request.getUrl());
|
|
|
+ }
|
|
|
+
|
|
|
+ @Override
|
|
|
+ public void onError(Request request) {
|
|
|
+ log.info("onError>>>>>>{}", request.getUrl());
|
|
|
+
|
|
|
+ }
|
|
|
+ });
|
|
|
+ spider.setSpiderListeners(listListeners);
|
|
|
+ spider.addUrl(urls);
|
|
|
+ spider.runAsync();
|
|
|
+ }
|
|
|
+
|
|
|
+ @Override
|
|
|
+ public void stop(PopularFeelings popular) {
|
|
|
+
|
|
|
+ }
|
|
|
+
|
|
|
+ static class PopularFeelingsProcessor implements PageProcessor {
|
|
|
+ private final List<String> LIST_SPIDER_URLS = new ArrayList<>();
|
|
|
+
|
|
|
+ private final Site site;
|
|
|
+
|
|
|
+ private final PopularFeelings popular;
|
|
|
+
|
|
|
+ private final PopularFeelingsPageMapper popularFeelingsPageMapper;
|
|
|
+
|
|
|
+ public PopularFeelingsProcessor(PopularFeelings popular, PopularFeelingsPageMapper popularFeelingsPageMapper) {
|
|
|
+ this.popular = popular;
|
|
|
+ this.site = Site.me().setDomain(this.popular.getDomain()).setRetryTimes(3).setSleepTime(1000);
|
|
|
+ this.popularFeelingsPageMapper = popularFeelingsPageMapper;
|
|
|
+ }
|
|
|
+
|
|
|
+ public void process(Page page) {
|
|
|
+ List<String> listUrls = page.getHtml().links().all();
|
|
|
+ listUrls.removeIf(s -> {
|
|
|
+ log.info(s);
|
|
|
+ return LIST_SPIDER_URLS.contains(s) || StringUtils.isBlank(s);
|
|
|
+ });
|
|
|
+ LIST_SPIDER_URLS.addAll(listUrls);
|
|
|
+ log.info(">>>>>>>>{}", LIST_SPIDER_URLS.size());
|
|
|
+ page.addTargetRequests(listUrls);
|
|
|
+
|
|
|
+ Document document = page.getHtml().getDocument();
|
|
|
+ String uri = document.baseUri();
|
|
|
+ log.info(uri);
|
|
|
+ String title = document.title();
|
|
|
+ String bodyText = document.body().text();
|
|
|
+ String html = document.html();
|
|
|
+ log.info("{}>>>>>>{}", uri, title);
|
|
|
+ if (title.contains(popular.getKeywords()) || bodyText.contains(popular.getKeywords())) {
|
|
|
+ PopularFeelingsPage popularFeelingsPage = new PopularFeelingsPage();
|
|
|
+ popularFeelingsPage.setId(document.id());
|
|
|
+ popularFeelingsPage.setPopularFeelingsId(popular.getId());
|
|
|
+ popularFeelingsPage.setUrl(uri);
|
|
|
+ popularFeelingsPage.setTitle(title);
|
|
|
+ popularFeelingsPage.setBodyText(bodyText);
|
|
|
+ popularFeelingsPage.setHtml(html);
|
|
|
+ popularFeelingsPage.setSpiderTime(DateUtil.current());
|
|
|
+ this.popularFeelingsPageMapper.insert(popularFeelingsPage);
|
|
|
+ }
|
|
|
+
|
|
|
+ }
|
|
|
+
|
|
|
+ public Site getSite() {
|
|
|
+ return this.site;
|
|
|
+ }
|
|
|
+ }
|
|
|
}
|