|
|
@@ -2,13 +2,100 @@ package com.zhiqiyun.open.core.service.impl;
|
|
|
|
|
|
import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
|
|
|
import com.zhiqiyun.open.core.mapper.statistics.PopularFeelingsMapper;
|
|
|
+import com.zhiqiyun.open.core.mapper.statistics.PopularFeelingsPageMapper;
|
|
|
import com.zhiqiyun.open.core.models.statistics.PopularFeelings;
|
|
|
+import com.zhiqiyun.open.core.models.statistics.PopularFeelingsPage;
|
|
|
import com.zhiqiyun.open.core.service.PopularFeelingsService;
|
|
|
+import com.zhiqiyun.open.utils.DateUtil;
|
|
|
+import lombok.extern.slf4j.Slf4j;
|
|
|
+import org.apache.commons.lang3.StringUtils;
|
|
|
+import org.jsoup.nodes.Document;
|
|
|
+import org.springframework.beans.factory.annotation.Autowired;
|
|
|
import org.springframework.stereotype.Service;
|
|
|
+import us.codecraft.webmagic.*;
|
|
|
+import us.codecraft.webmagic.processor.PageProcessor;
|
|
|
+
|
|
|
+import java.util.ArrayList;
|
|
|
+import java.util.List;
|
|
|
|
|
|
/**
|
|
|
* @author jtoms
|
|
|
*/
|
|
|
+@Slf4j
|
|
|
@Service
|
|
|
public class PopularFeelingsServiceImpl extends ServiceImpl<PopularFeelingsMapper, PopularFeelings> implements PopularFeelingsService {
|
|
|
+
|
|
|
+ @Autowired
|
|
|
+ private PopularFeelingsPageMapper popularFeelingsPageMapper;
|
|
|
+
|
|
|
+ @Override
|
|
|
+ public void start(PopularFeelings popular) {
|
|
|
+ String[] urls = popular.getStartUrls().toArray(new String[]{});
|
|
|
+
|
|
|
+ Spider spider = Spider.create(new PopularFeelingsProcessor(popular, this.popularFeelingsPageMapper));
|
|
|
+ List<SpiderListener> listListeners = new ArrayList<>();
|
|
|
+ listListeners.add(new SpiderListener() {
|
|
|
+ @Override
|
|
|
+ public void onSuccess(Request request) {
|
|
|
+ log.info("onSuccess>>>>>>{}", request.getUrl());
|
|
|
+ }
|
|
|
+
|
|
|
+ @Override
|
|
|
+ public void onError(Request request) {
|
|
|
+ log.info("onError>>>>>>{}", request.getUrl());
|
|
|
+
|
|
|
+ }
|
|
|
+ });
|
|
|
+ spider.setSpiderListeners(listListeners);
|
|
|
+ spider.addUrl(urls);
|
|
|
+ spider.runAsync();
|
|
|
+ }
|
|
|
+
|
|
|
+ static class PopularFeelingsProcessor implements PageProcessor {
|
|
|
+ private final List<String> LIST_SPIDER_URLS = new ArrayList<>();
|
|
|
+
|
|
|
+ private final Site site;
|
|
|
+
|
|
|
+ private final PopularFeelings popular;
|
|
|
+
|
|
|
+ private final PopularFeelingsPageMapper popularFeelingsPageMapper;
|
|
|
+
|
|
|
+ public PopularFeelingsProcessor(PopularFeelings popular, PopularFeelingsPageMapper popularFeelingsPageMapper) {
|
|
|
+ this.popular = popular;
|
|
|
+ this.site = Site.me().setDomain(this.popular.getDomain()).setRetryTimes(3).setSleepTime(1000);
|
|
|
+ this.popularFeelingsPageMapper = popularFeelingsPageMapper;
|
|
|
+ }
|
|
|
+
|
|
|
+ public void process(Page page) {
|
|
|
+ List<String> listUrls = page.getHtml().links().all();
|
|
|
+ listUrls.removeIf(s -> {
|
|
|
+ log.info(s);
|
|
|
+ return LIST_SPIDER_URLS.contains(s) || StringUtils.isBlank(s);
|
|
|
+ });
|
|
|
+ LIST_SPIDER_URLS.addAll(listUrls);
|
|
|
+ log.info(">>>>>>>>{}", LIST_SPIDER_URLS.size());
|
|
|
+ page.addTargetRequests(listUrls);
|
|
|
+
|
|
|
+ Document document = page.getHtml().getDocument();
|
|
|
+ String uri = document.baseUri();
|
|
|
+ log.info(uri);
|
|
|
+ String title = document.title();
|
|
|
+ String bodyText = document.body().text();
|
|
|
+ String html = document.html();
|
|
|
+ log.info("{}>>>>>>{}", uri, title);
|
|
|
+ PopularFeelingsPage popularFeelingsPage = new PopularFeelingsPage();
|
|
|
+ popularFeelingsPage.setId(document.id());
|
|
|
+ popularFeelingsPage.setPopularFeelingsId(popular.getId());
|
|
|
+ popularFeelingsPage.setUrl(uri);
|
|
|
+ popularFeelingsPage.setTitle(title);
|
|
|
+ popularFeelingsPage.setBodyText(bodyText);
|
|
|
+ popularFeelingsPage.setHtml(html);
|
|
|
+ popularFeelingsPage.setSpiderTime(DateUtil.current());
|
|
|
+ this.popularFeelingsPageMapper.insert(popularFeelingsPage);
|
|
|
+ }
|
|
|
+
|
|
|
+ public Site getSite() {
|
|
|
+ return this.site;
|
|
|
+ }
|
|
|
+ }
|
|
|
}
|