|
|
@@ -1,220 +0,0 @@
|
|
|
-package com.zhiqiyun.open.core.service.impl;
|
|
|
-
|
|
|
-import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
|
|
|
-import com.zhiqiyun.open.core.mapper.statistics.PopularFeelingsMapper;
|
|
|
-import com.zhiqiyun.open.core.models.statistics.PopularFeelings;
|
|
|
-import com.zhiqiyun.open.core.models.statistics.PopularFeelingsPage;
|
|
|
-import com.zhiqiyun.open.core.service.PopularFeelingsPageService;
|
|
|
-import com.zhiqiyun.open.core.service.PopularFeelingsService;
|
|
|
-import com.zhiqiyun.open.utils.DateUtil;
|
|
|
-import lombok.extern.slf4j.Slf4j;
|
|
|
-import org.apache.commons.codec.digest.DigestUtils;
|
|
|
-import org.apache.commons.lang3.StringUtils;
|
|
|
-import org.jsoup.nodes.Document;
|
|
|
-import org.jsoup.nodes.Element;
|
|
|
-import org.jsoup.select.Elements;
|
|
|
-import org.springframework.beans.factory.annotation.Autowired;
|
|
|
-import org.springframework.stereotype.Service;
|
|
|
-import us.codecraft.webmagic.*;
|
|
|
-import us.codecraft.webmagic.pipeline.Pipeline;
|
|
|
-import us.codecraft.webmagic.processor.PageProcessor;
|
|
|
-
|
|
|
-import java.util.ArrayList;
|
|
|
-import java.util.List;
|
|
|
-import java.util.Map;
|
|
|
-import java.util.concurrent.ConcurrentHashMap;
|
|
|
-import java.util.regex.Pattern;
|
|
|
-
|
|
|
-/**
|
|
|
- * @author jtoms
|
|
|
- */
|
|
|
-@Slf4j
|
|
|
-@Service
|
|
|
-public class PopularFeelingsServiceImpl extends ServiceImpl<PopularFeelingsMapper, PopularFeelings> implements PopularFeelingsService {
|
|
|
-
|
|
|
-// @Autowired
|
|
|
-// private PopularFeelingsPageService popularFeelingsPageService;
|
|
|
-//
|
|
|
-// private static final ConcurrentHashMap<Long, Spider> SPIDER_RUNNING_MAP = new ConcurrentHashMap<>();
|
|
|
-//
|
|
|
-// @Override
|
|
|
-// public void start(PopularFeelings popular) {
|
|
|
-// String[] urls = popular.getStartUrls().toArray(new String[]{});
|
|
|
-//
|
|
|
-// Spider spider = Spider.create(new PopularFeelingsProcessor(popular));
|
|
|
-// List<SpiderListener> listListeners = new ArrayList<>();
|
|
|
-// listListeners.add(new SpiderListener() {
|
|
|
-// @Override
|
|
|
-// public void onSuccess(Request request) {
|
|
|
-// log.info("onSuccess>>>>>>{}", request.getUrl());
|
|
|
-// }
|
|
|
-//
|
|
|
-// @Override
|
|
|
-// public void onError(Request request) {
|
|
|
-// log.info("onError>>>>>>{}", request.getUrl());
|
|
|
-// }
|
|
|
-// });
|
|
|
-// List<Pipeline> pipelines = new ArrayList<>();
|
|
|
-// pipelines.add(new Pipeline() {
|
|
|
-// @Override
|
|
|
-// public void process(ResultItems resultItems, Task task) {
|
|
|
-// Map<String, Object> dataMap = resultItems.getAll();
|
|
|
-// log.info(resultItems.getRequest().getUrl());
|
|
|
-// String url = resultItems.getRequest().getUrl();
|
|
|
-// String id = DigestUtils.md5Hex(url.replace("http://", "").replace("https://", ""));
|
|
|
-//
|
|
|
-// String title = dataMap.getOrDefault("title", "").toString();
|
|
|
-// String keywords = dataMap.getOrDefault("keywords", "").toString();
|
|
|
-// String description = dataMap.getOrDefault("description", "").toString();
|
|
|
-// String bodyText = dataMap.getOrDefault("bodyText", "").toString();
|
|
|
-// String html = dataMap.getOrDefault("html", "").toString();
|
|
|
-//
|
|
|
-// if (StringUtils.isBlank(popular.getKeywords())) {
|
|
|
-// return;
|
|
|
-// }
|
|
|
-//
|
|
|
-// String[] listKeywords = popular.getKeywords().split(",");
|
|
|
-//
|
|
|
-// boolean flag = true;
|
|
|
-// for (String k : listKeywords) {
|
|
|
-// if (!bodyText.contains(k)) {
|
|
|
-// flag = false;
|
|
|
-// break;
|
|
|
-// }
|
|
|
-// }
|
|
|
-//
|
|
|
-// if (flag) {
|
|
|
-// PopularFeelingsPage popularFeelingsPage = new PopularFeelingsPage();
|
|
|
-//
|
|
|
-// popularFeelingsPage.setId(id);
|
|
|
-// popularFeelingsPage.setPopularFeelingsId(popular.getId());
|
|
|
-// popularFeelingsPage.setUrl(url);
|
|
|
-// popularFeelingsPage.setTitle(title);
|
|
|
-// popularFeelingsPage.setKeywords(keywords);
|
|
|
-// popularFeelingsPage.setDescription(description);
|
|
|
-// popularFeelingsPage.setBodyText(bodyText);
|
|
|
-// popularFeelingsPage.setHtml(html);
|
|
|
-// popularFeelingsPage.setSpiderTime(DateUtil.current());
|
|
|
-// popularFeelingsPage.setUpdateTime(DateUtil.current());
|
|
|
-// popularFeelingsPageService.saveOrUpdate(popularFeelingsPage);
|
|
|
-// }
|
|
|
-// }
|
|
|
-// });
|
|
|
-// spider.setPipelines(pipelines);
|
|
|
-// spider.setExitWhenComplete(true);
|
|
|
-// spider.setSpiderListeners(listListeners);
|
|
|
-// spider.addUrl(urls);
|
|
|
-// spider.runAsync();
|
|
|
-//
|
|
|
-// SPIDER_RUNNING_MAP.put(popular.getId(), spider);
|
|
|
-// }
|
|
|
-//
|
|
|
-// @Override
|
|
|
-// public void stop(PopularFeelings popular) {
|
|
|
-// Spider spider = SPIDER_RUNNING_MAP.get(popular.getId());
|
|
|
-// if (spider != null && spider.getStatus().equals(Spider.Status.Running)) {
|
|
|
-// spider.stop();
|
|
|
-// }
|
|
|
-// }
|
|
|
-//
|
|
|
-// @Override
|
|
|
-// public int getStatus(Long popularFeelingsId) {
|
|
|
-// Spider spider = SPIDER_RUNNING_MAP.get(popularFeelingsId);
|
|
|
-// if (spider == null) {
|
|
|
-// return 0;
|
|
|
-// } else if (spider.getStatus().equals(Spider.Status.Running)) {
|
|
|
-// return 1;
|
|
|
-// } else if (spider.getStatus().equals(Spider.Status.Stopped)) {
|
|
|
-// return 2;
|
|
|
-// } else {
|
|
|
-// return 0;
|
|
|
-// }
|
|
|
-// }
|
|
|
-//
|
|
|
-// static class PopularFeelingsProcessor implements PageProcessor {
|
|
|
-// private final List<String> LIST_SPIDER_URLS = new ArrayList<>();
|
|
|
-//
|
|
|
-// private final Site site;
|
|
|
-//
|
|
|
-// private final PopularFeelings popular;
|
|
|
-//
|
|
|
-//
|
|
|
-// public PopularFeelingsProcessor(PopularFeelings popular) {
|
|
|
-// this.popular = popular;
|
|
|
-//
|
|
|
-// this.site = Site.me();
|
|
|
-// this.site.setDomain(this.popular.getDomain());
|
|
|
-// this.site.setUseGzip(true);
|
|
|
-// this.site.setRetryTimes(3);
|
|
|
-// this.site.setSleepTime(1000);
|
|
|
-// }
|
|
|
-//
|
|
|
-// @Override
|
|
|
-// public void process(Page page) {
|
|
|
-// List<String> listUrls = page.getHtml().links().all();
|
|
|
-// listUrls.removeIf(url -> StringUtils.isBlank(url) || LIST_SPIDER_URLS.contains(url));
|
|
|
-//
|
|
|
-// String urlPatterns = this.popular.getUrlPatterns();
|
|
|
-// if (StringUtils.isNotBlank(urlPatterns)) {
|
|
|
-// String[] listPatterns = urlPatterns.split("\n");
|
|
|
-// listUrls.removeIf(url -> {
|
|
|
-// boolean hasMatched = false;
|
|
|
-// for (String regex : listPatterns) {
|
|
|
-// hasMatched = Pattern.matches(regex, url);
|
|
|
-// if (hasMatched) {
|
|
|
-// break;
|
|
|
-// }
|
|
|
-// }
|
|
|
-// return !hasMatched;
|
|
|
-// });
|
|
|
-// }
|
|
|
-//
|
|
|
-// LIST_SPIDER_URLS.addAll(listUrls);
|
|
|
-//
|
|
|
-// page.addTargetRequests(listUrls);
|
|
|
-//
|
|
|
-// Document document = page.getHtml().getDocument();
|
|
|
-//
|
|
|
-// String title = document.title();
|
|
|
-// String bodyText = document.text();
|
|
|
-// String html = document.html();
|
|
|
-//
|
|
|
-// String keywords = "";
|
|
|
-// String description = "";
|
|
|
-// Elements elements = document.getElementsByTag("meta");
|
|
|
-//
|
|
|
-// if (elements != null && elements.size() > 0) {
|
|
|
-// for (Element element : elements) {
|
|
|
-// String metaName = element.attr("name");
|
|
|
-// String metaContent = element.attr("content");
|
|
|
-// if (StringUtils.equalsIgnoreCase("keywords", metaName)) {
|
|
|
-// keywords = metaContent;
|
|
|
-// }
|
|
|
-// if (StringUtils.equalsIgnoreCase("description", metaName)) {
|
|
|
-// description = metaContent;
|
|
|
-// }
|
|
|
-// }
|
|
|
-// }
|
|
|
-//
|
|
|
-// if (StringUtils.isBlank(description) && StringUtils.isNotBlank(bodyText)) {
|
|
|
-// description = bodyText.length() >= 200 ? bodyText.substring(0, 200) : bodyText;
|
|
|
-// }
|
|
|
-//
|
|
|
-// if (StringUtils.isBlank(keywords) && StringUtils.isNotBlank(description)) {
|
|
|
-// keywords = description;
|
|
|
-// }
|
|
|
-//
|
|
|
-//
|
|
|
-// page.putField("title", title);
|
|
|
-// page.putField("bodyText", bodyText);
|
|
|
-// page.putField("html", html);
|
|
|
-// page.putField("keywords", keywords);
|
|
|
-// page.putField("description", description);
|
|
|
-// }
|
|
|
-//
|
|
|
-// @Override
|
|
|
-// public Site getSite() {
|
|
|
-// return this.site;
|
|
|
-// }
|
|
|
-// }
|
|
|
-}
|