stjdydayou 4 年 前
コミット
59e7b15ebb

+ 0 - 34
src/main/java/com/zhiqiyun/open/core/models/statistics/PopularFeelingsSiteRule.java

@@ -1,34 +0,0 @@
-package com.zhiqiyun.open.core.models.statistics;
-
-import com.baomidou.mybatisplus.annotation.FieldStrategy;
-import com.baomidou.mybatisplus.annotation.TableField;
-import com.baomidou.mybatisplus.annotation.TableName;
-import com.zhiqiyun.open.core.typeHandler.FastjsonTypeHandler;
-import lombok.Data;
-
-import java.util.Date;
-import java.util.List;
-
-/**
- * @author jtoms
- */
-@Data
-@TableName(value = "popular_feelings", autoResultMap = true)
-public class PopularFeelingsSiteRule {
-    private Long id;
-    private String siteName;
-    private String urlPatterns;
-
-    @TableField(typeHandler = FastjsonTypeHandler.class)
-    private List<String> startUrls;
-
-    private String xpath;
-
-    @TableField(updateStrategy = FieldStrategy.NEVER)
-    private Date createdTime;
-    @TableField(updateStrategy = FieldStrategy.NEVER)
-    private Long createdBy;
-
-    private Date updatedTime;
-    private Long updatedBy;
-}

+ 3 - 3
src/main/java/com/zhiqiyun/open/core/service/PopularFeelingsService.java

@@ -12,20 +12,20 @@ public interface PopularFeelingsService extends IService<PopularFeelings> {
      *
      * @param popular
      */
-    void start(PopularFeelings popular);
+//    void start(PopularFeelings popular);
 
     /**
      * 停止采集
      *
      * @param popular
      */
-    void stop(PopularFeelings popular);
+//    void stop(PopularFeelings popular);
 
     /**
      * 查询运行状态
      *
      * @param popularFeelingsId
      */
-    int getStatus(Long popularFeelingsId);
+//    int getStatus(Long popularFeelingsId);
 
 }

+ 6 - 7
src/main/java/com/zhiqiyun/open/core/service/SentimentSpiderEventService.java

@@ -2,28 +2,27 @@ package com.zhiqiyun.open.core.service;
 
 import com.baomidou.mybatisplus.extension.service.IService;
 import com.zhiqiyun.open.core.models.sentiment.SentimentSpiderEvent;
-import com.zhiqiyun.open.core.models.statistics.PopularFeelings;
 
 public interface SentimentSpiderEventService extends IService<SentimentSpiderEvent> {
 	/**
 	 * 启动采集
 	 *
-	 * @param popular
+	 * @param event
 	 */
-	void start(PopularFeelings popular);
+	void start(SentimentSpiderEvent event);
 
 	/**
 	 * 停止采集
 	 *
-	 * @param popular
+	 * @param event
 	 */
-	void stop(PopularFeelings popular);
+	void stop(SentimentSpiderEvent event);
 
 	/**
 	 * 查询运行状态
 	 *
-	 * @param popularFeelingsId
+	 * @param eventId
 	 */
-	int getStatus(Long popularFeelingsId);
+	int getStatus(Long eventId);
 
 }

+ 185 - 185
src/main/java/com/zhiqiyun/open/core/service/impl/PopularFeelingsServiceImpl.java

@@ -32,189 +32,189 @@ import java.util.regex.Pattern;
 @Service
 public class PopularFeelingsServiceImpl extends ServiceImpl<PopularFeelingsMapper, PopularFeelings> implements PopularFeelingsService {
 
-    @Autowired
-    private PopularFeelingsPageService popularFeelingsPageService;
-
-    private static final ConcurrentHashMap<Long, Spider> SPIDER_RUNNING_MAP = new ConcurrentHashMap<>();
-
-    @Override
-    public void start(PopularFeelings popular) {
-        String[] urls = popular.getStartUrls().toArray(new String[]{});
-
-        Spider spider = Spider.create(new PopularFeelingsProcessor(popular));
-        List<SpiderListener> listListeners = new ArrayList<>();
-        listListeners.add(new SpiderListener() {
-            @Override
-            public void onSuccess(Request request) {
-                log.info("onSuccess>>>>>>{}", request.getUrl());
-            }
-
-            @Override
-            public void onError(Request request) {
-                log.info("onError>>>>>>{}", request.getUrl());
-            }
-        });
-        List<Pipeline> pipelines = new ArrayList<>();
-        pipelines.add(new Pipeline() {
-            @Override
-            public void process(ResultItems resultItems, Task task) {
-                Map<String, Object> dataMap = resultItems.getAll();
-                log.info(resultItems.getRequest().getUrl());
-                String url = resultItems.getRequest().getUrl();
-                String id = DigestUtils.md5Hex(url.replace("http://", "").replace("https://", ""));
-
-                String title = dataMap.getOrDefault("title", "").toString();
-                String keywords = dataMap.getOrDefault("keywords", "").toString();
-                String description = dataMap.getOrDefault("description", "").toString();
-                String bodyText = dataMap.getOrDefault("bodyText", "").toString();
-                String html = dataMap.getOrDefault("html", "").toString();
-
-                if (StringUtils.isBlank(popular.getKeywords())) {
-                    return;
-                }
-
-                String[] listKeywords = popular.getKeywords().split(",");
-
-                boolean flag = true;
-                for (String k : listKeywords) {
-                    if (!bodyText.contains(k)) {
-                        flag = false;
-                        break;
-                    }
-                }
-
-                if (flag) {
-                    PopularFeelingsPage popularFeelingsPage = new PopularFeelingsPage();
-
-                    popularFeelingsPage.setId(id);
-                    popularFeelingsPage.setPopularFeelingsId(popular.getId());
-                    popularFeelingsPage.setUrl(url);
-                    popularFeelingsPage.setTitle(title);
-                    popularFeelingsPage.setKeywords(keywords);
-                    popularFeelingsPage.setDescription(description);
-                    popularFeelingsPage.setBodyText(bodyText);
-                    popularFeelingsPage.setHtml(html);
-                    popularFeelingsPage.setSpiderTime(DateUtil.current());
-                    popularFeelingsPage.setUpdateTime(DateUtil.current());
-                    popularFeelingsPageService.saveOrUpdate(popularFeelingsPage);
-                }
-            }
-        });
-        spider.setPipelines(pipelines);
-        spider.setExitWhenComplete(true);
-        spider.setSpiderListeners(listListeners);
-        spider.addUrl(urls);
-        spider.runAsync();
-
-        SPIDER_RUNNING_MAP.put(popular.getId(), spider);
-    }
-
-    @Override
-    public void stop(PopularFeelings popular) {
-        Spider spider = SPIDER_RUNNING_MAP.get(popular.getId());
-        if (spider != null && spider.getStatus().equals(Spider.Status.Running)) {
-            spider.stop();
-        }
-    }
-
-    @Override
-    public int getStatus(Long popularFeelingsId) {
-        Spider spider = SPIDER_RUNNING_MAP.get(popularFeelingsId);
-        if (spider == null) {
-            return 0;
-        } else if (spider.getStatus().equals(Spider.Status.Running)) {
-            return 1;
-        } else if (spider.getStatus().equals(Spider.Status.Stopped)) {
-            return 2;
-        } else {
-            return 0;
-        }
-    }
-
-    static class PopularFeelingsProcessor implements PageProcessor {
-        private final List<String> LIST_SPIDER_URLS = new ArrayList<>();
-
-        private final Site site;
-
-        private final PopularFeelings popular;
-
-
-        public PopularFeelingsProcessor(PopularFeelings popular) {
-            this.popular = popular;
-
-            this.site = Site.me();
-            this.site.setDomain(this.popular.getDomain());
-            this.site.setUseGzip(true);
-            this.site.setRetryTimes(3);
-            this.site.setSleepTime(1000);
-        }
-
-        @Override
-        public void process(Page page) {
-            List<String> listUrls = page.getHtml().links().all();
-            listUrls.removeIf(url -> StringUtils.isBlank(url) || LIST_SPIDER_URLS.contains(url));
-
-            String urlPatterns = this.popular.getUrlPatterns();
-            if (StringUtils.isNotBlank(urlPatterns)) {
-                String[] listPatterns = urlPatterns.split("\n");
-                listUrls.removeIf(url -> {
-                    boolean hasMatched = false;
-                    for (String regex : listPatterns) {
-                        hasMatched = Pattern.matches(regex, url);
-                        if (hasMatched) {
-                            break;
-                        }
-                    }
-                    return !hasMatched;
-                });
-            }
-
-            LIST_SPIDER_URLS.addAll(listUrls);
-
-            page.addTargetRequests(listUrls);
-
-            Document document = page.getHtml().getDocument();
-
-            String title = document.title();
-            String bodyText = document.text();
-            String html = document.html();
-
-            String keywords = "";
-            String description = "";
-            Elements elements = document.getElementsByTag("meta");
-
-            if (elements != null && elements.size() > 0) {
-                for (Element element : elements) {
-                    String metaName = element.attr("name");
-                    String metaContent = element.attr("content");
-                    if (StringUtils.equalsIgnoreCase("keywords", metaName)) {
-                        keywords = metaContent;
-                    }
-                    if (StringUtils.equalsIgnoreCase("description", metaName)) {
-                        description = metaContent;
-                    }
-                }
-            }
-
-            if (StringUtils.isBlank(description) && StringUtils.isNotBlank(bodyText)) {
-                description = bodyText.length() >= 200 ? bodyText.substring(0, 200) : bodyText;
-            }
-
-            if (StringUtils.isBlank(keywords) && StringUtils.isNotBlank(description)) {
-                keywords = description;
-            }
-
-
-            page.putField("title", title);
-            page.putField("bodyText", bodyText);
-            page.putField("html", html);
-            page.putField("keywords", keywords);
-            page.putField("description", description);
-        }
-
-        @Override
-        public Site getSite() {
-            return this.site;
-        }
-    }
+//    @Autowired
+//    private PopularFeelingsPageService popularFeelingsPageService;
+//
+//    private static final ConcurrentHashMap<Long, Spider> SPIDER_RUNNING_MAP = new ConcurrentHashMap<>();
+//
+//    @Override
+//    public void start(PopularFeelings popular) {
+//        String[] urls = popular.getStartUrls().toArray(new String[]{});
+//
+//        Spider spider = Spider.create(new PopularFeelingsProcessor(popular));
+//        List<SpiderListener> listListeners = new ArrayList<>();
+//        listListeners.add(new SpiderListener() {
+//            @Override
+//            public void onSuccess(Request request) {
+//                log.info("onSuccess>>>>>>{}", request.getUrl());
+//            }
+//
+//            @Override
+//            public void onError(Request request) {
+//                log.info("onError>>>>>>{}", request.getUrl());
+//            }
+//        });
+//        List<Pipeline> pipelines = new ArrayList<>();
+//        pipelines.add(new Pipeline() {
+//            @Override
+//            public void process(ResultItems resultItems, Task task) {
+//                Map<String, Object> dataMap = resultItems.getAll();
+//                log.info(resultItems.getRequest().getUrl());
+//                String url = resultItems.getRequest().getUrl();
+//                String id = DigestUtils.md5Hex(url.replace("http://", "").replace("https://", ""));
+//
+//                String title = dataMap.getOrDefault("title", "").toString();
+//                String keywords = dataMap.getOrDefault("keywords", "").toString();
+//                String description = dataMap.getOrDefault("description", "").toString();
+//                String bodyText = dataMap.getOrDefault("bodyText", "").toString();
+//                String html = dataMap.getOrDefault("html", "").toString();
+//
+//                if (StringUtils.isBlank(popular.getKeywords())) {
+//                    return;
+//                }
+//
+//                String[] listKeywords = popular.getKeywords().split(",");
+//
+//                boolean flag = true;
+//                for (String k : listKeywords) {
+//                    if (!bodyText.contains(k)) {
+//                        flag = false;
+//                        break;
+//                    }
+//                }
+//
+//                if (flag) {
+//                    PopularFeelingsPage popularFeelingsPage = new PopularFeelingsPage();
+//
+//                    popularFeelingsPage.setId(id);
+//                    popularFeelingsPage.setPopularFeelingsId(popular.getId());
+//                    popularFeelingsPage.setUrl(url);
+//                    popularFeelingsPage.setTitle(title);
+//                    popularFeelingsPage.setKeywords(keywords);
+//                    popularFeelingsPage.setDescription(description);
+//                    popularFeelingsPage.setBodyText(bodyText);
+//                    popularFeelingsPage.setHtml(html);
+//                    popularFeelingsPage.setSpiderTime(DateUtil.current());
+//                    popularFeelingsPage.setUpdateTime(DateUtil.current());
+//                    popularFeelingsPageService.saveOrUpdate(popularFeelingsPage);
+//                }
+//            }
+//        });
+//        spider.setPipelines(pipelines);
+//        spider.setExitWhenComplete(true);
+//        spider.setSpiderListeners(listListeners);
+//        spider.addUrl(urls);
+//        spider.runAsync();
+//
+//        SPIDER_RUNNING_MAP.put(popular.getId(), spider);
+//    }
+//
+//    @Override
+//    public void stop(PopularFeelings popular) {
+//        Spider spider = SPIDER_RUNNING_MAP.get(popular.getId());
+//        if (spider != null && spider.getStatus().equals(Spider.Status.Running)) {
+//            spider.stop();
+//        }
+//    }
+//
+//    @Override
+//    public int getStatus(Long popularFeelingsId) {
+//        Spider spider = SPIDER_RUNNING_MAP.get(popularFeelingsId);
+//        if (spider == null) {
+//            return 0;
+//        } else if (spider.getStatus().equals(Spider.Status.Running)) {
+//            return 1;
+//        } else if (spider.getStatus().equals(Spider.Status.Stopped)) {
+//            return 2;
+//        } else {
+//            return 0;
+//        }
+//    }
+//
+//    static class PopularFeelingsProcessor implements PageProcessor {
+//        private final List<String> LIST_SPIDER_URLS = new ArrayList<>();
+//
+//        private final Site site;
+//
+//        private final PopularFeelings popular;
+//
+//
+//        public PopularFeelingsProcessor(PopularFeelings popular) {
+//            this.popular = popular;
+//
+//            this.site = Site.me();
+//            this.site.setDomain(this.popular.getDomain());
+//            this.site.setUseGzip(true);
+//            this.site.setRetryTimes(3);
+//            this.site.setSleepTime(1000);
+//        }
+//
+//        @Override
+//        public void process(Page page) {
+//            List<String> listUrls = page.getHtml().links().all();
+//            listUrls.removeIf(url -> StringUtils.isBlank(url) || LIST_SPIDER_URLS.contains(url));
+//
+//            String urlPatterns = this.popular.getUrlPatterns();
+//            if (StringUtils.isNotBlank(urlPatterns)) {
+//                String[] listPatterns = urlPatterns.split("\n");
+//                listUrls.removeIf(url -> {
+//                    boolean hasMatched = false;
+//                    for (String regex : listPatterns) {
+//                        hasMatched = Pattern.matches(regex, url);
+//                        if (hasMatched) {
+//                            break;
+//                        }
+//                    }
+//                    return !hasMatched;
+//                });
+//            }
+//
+//            LIST_SPIDER_URLS.addAll(listUrls);
+//
+//            page.addTargetRequests(listUrls);
+//
+//            Document document = page.getHtml().getDocument();
+//
+//            String title = document.title();
+//            String bodyText = document.text();
+//            String html = document.html();
+//
+//            String keywords = "";
+//            String description = "";
+//            Elements elements = document.getElementsByTag("meta");
+//
+//            if (elements != null && elements.size() > 0) {
+//                for (Element element : elements) {
+//                    String metaName = element.attr("name");
+//                    String metaContent = element.attr("content");
+//                    if (StringUtils.equalsIgnoreCase("keywords", metaName)) {
+//                        keywords = metaContent;
+//                    }
+//                    if (StringUtils.equalsIgnoreCase("description", metaName)) {
+//                        description = metaContent;
+//                    }
+//                }
+//            }
+//
+//            if (StringUtils.isBlank(description) && StringUtils.isNotBlank(bodyText)) {
+//                description = bodyText.length() >= 200 ? bodyText.substring(0, 200) : bodyText;
+//            }
+//
+//            if (StringUtils.isBlank(keywords) && StringUtils.isNotBlank(description)) {
+//                keywords = description;
+//            }
+//
+//
+//            page.putField("title", title);
+//            page.putField("bodyText", bodyText);
+//            page.putField("html", html);
+//            page.putField("keywords", keywords);
+//            page.putField("description", description);
+//        }
+//
+//        @Override
+//        public Site getSite() {
+//            return this.site;
+//        }
+//    }
 }

+ 209 - 6
src/main/java/com/zhiqiyun/open/core/service/impl/SentimentSpiderEventServiceImpl.java

@@ -1,26 +1,229 @@
 package com.zhiqiyun.open.core.service.impl;
 
+import com.alibaba.fastjson.JSON;
 import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
 import com.zhiqiyun.open.core.mapper.sentiment.SentimentSpiderEventMapper;
 import com.zhiqiyun.open.core.models.sentiment.SentimentSpiderEvent;
-import com.zhiqiyun.open.core.models.statistics.PopularFeelings;
+import com.zhiqiyun.open.core.models.sentiment.SentimentSpiderSiteRule;
 import com.zhiqiyun.open.core.service.SentimentSpiderEventService;
+import lombok.extern.slf4j.Slf4j;
+import org.apache.commons.codec.digest.DigestUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
 import org.springframework.stereotype.Service;
+import us.codecraft.webmagic.*;
+import us.codecraft.webmagic.pipeline.Pipeline;
+import us.codecraft.webmagic.processor.PageProcessor;
+import us.codecraft.webmagic.selector.Selectable;
 
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.regex.Pattern;
+
+@Slf4j
 @Service
 public class SentimentSpiderEventServiceImpl extends ServiceImpl<SentimentSpiderEventMapper, SentimentSpiderEvent> implements SentimentSpiderEventService {
+
+	private static final ConcurrentHashMap<Long, Spider> SPIDER_RUNNING_MAP = new ConcurrentHashMap<>();
+
 	@Override
-	public void start(PopularFeelings popular) {
+	public void start(SentimentSpiderEvent event) {
+
+		SentimentSpiderSiteRule rule = event.getRule();
+		if (rule == null || StringUtils.isBlank(event.getKeywords()) || StringUtils.isBlank(rule.getStartUrls())) {
+			return;
+		}
+		String[] urlArray = rule.getStartUrls().split("\n");
+
+		Spider spider = Spider.create(new SpiderEventProcessor(rule));
+
+		List<Pipeline> pipelines = new ArrayList<>();
 
+		String[] keywords = event.getKeywords().split(",");
+		pipelines.add(new SpiderEventPipeline(keywords));
+		spider.setPipelines(pipelines);
+		spider.setExitWhenComplete(true);
+		spider.addUrl(urlArray);
+		spider.runAsync();
+
+		SPIDER_RUNNING_MAP.put(event.getId(), spider);
 	}
 
 	@Override
-	public void stop(PopularFeelings popular) {
-
+	public void stop(SentimentSpiderEvent event) {
+		Spider spider = SPIDER_RUNNING_MAP.get(event.getId());
+		if (spider != null && spider.getStatus().equals(Spider.Status.Running)) {
+			spider.stop();
+		}
 	}
 
 	@Override
-	public int getStatus(Long popularFeelingsId) {
-		return 0;
+	public int getStatus(Long eventId) {
+		Spider spider = SPIDER_RUNNING_MAP.get(eventId);
+		if (spider == null) {
+			return 0;
+		} else if (spider.getStatus().equals(Spider.Status.Running)) {
+			return 1;
+		} else if (spider.getStatus().equals(Spider.Status.Stopped)) {
+			return 2;
+		} else {
+			return 0;
+		}
+	}
+
+	public static class SpiderEventPipeline implements Pipeline {
+
+		private final String[] keywords;
+
+		public SpiderEventPipeline(String[] keywords) {
+			this.keywords = keywords;
+		}
+
+		@Override
+		public void process(ResultItems resultItems, Task task) {
+			Map<String, Object> dataMap = resultItems.getAll();
+
+			String url = resultItems.getRequest().getUrl();
+			String id = DigestUtils.md5Hex(url.replace("http://", "").replace("https://", ""));
+
+			String title = dataMap.getOrDefault("title", "").toString();
+			String keywords = dataMap.getOrDefault("keywords", "").toString();
+			String description = dataMap.getOrDefault("description", "").toString();
+			String bodyText = dataMap.getOrDefault("bodyText", "").toString();
+			String html = dataMap.getOrDefault("html", "").toString();
+			List<String> listFragments = JSON.parseArray(dataMap.getOrDefault("listFragments", "[]").toString(), String.class);
+
+
+			boolean flag = true;
+			for (String keyword : this.keywords) {
+				if (!bodyText.contains(keyword)) {
+					flag = false;
+					break;
+				}
+			}
+
+			listFragments.removeIf(o -> {
+				boolean hasContains = false;
+				for (String keyword : this.keywords) {
+					if (o.contains(keyword)) {
+						hasContains = true;
+						break;
+					}
+				}
+				return !hasContains;
+			});
+
+			log.info("{}>>>>>>>>{}>>>>>>>>{}", flag, listFragments.size(), bodyText);
+		}
+	}
+
+	public static class SpiderEventProcessor implements PageProcessor {
+		private final List<String> LIST_SPIDER_URLS = new ArrayList<>();
+
+		private final Site site;
+
+		private final SentimentSpiderSiteRule rule;
+
+
+		public SpiderEventProcessor(SentimentSpiderSiteRule rule) {
+			this.rule = rule;
+			this.site = Site.me();
+			this.site.setUseGzip(true);
+			this.site.setRetryTimes(3);
+			this.site.setSleepTime(1000);
+		}
+
+		@Override
+		public void process(Page page) {
+			List<String> listUrls = page.getHtml().links().all();
+			listUrls.removeIf(url -> StringUtils.isBlank(url) || LIST_SPIDER_URLS.contains(url));
+
+			String urlPatterns = this.rule.getUrlPatterns();
+			if (StringUtils.isNotBlank(urlPatterns)) {
+				String[] listPatterns = urlPatterns.split("\n");
+				listUrls.removeIf(url -> {
+					boolean hasMatched = false;
+					for (String regex : listPatterns) {
+						hasMatched = Pattern.matches(regex, url);
+						if (hasMatched) {
+							break;
+						}
+					}
+					return !hasMatched;
+				});
+			}
+
+			LIST_SPIDER_URLS.addAll(listUrls);
+
+			page.addTargetRequests(listUrls);
+
+			Document document = page.getHtml().getDocument();
+
+			String title = document.title();
+			String bodyText = "";
+			String html = document.html();
+
+			String keywords = "";
+			String description = "";
+			Elements elements = document.getElementsByTag("meta");
+
+			if (elements != null && elements.size() > 0) {
+				for (Element element : elements) {
+					String metaName = element.attr("name");
+					String metaContent = element.attr("content");
+					if (StringUtils.equalsIgnoreCase("keywords", metaName)) {
+						keywords = metaContent;
+					}
+					if (StringUtils.equalsIgnoreCase("description", metaName)) {
+						description = metaContent;
+					}
+				}
+			}
+
+			Selectable selectable = page.getHtml().xpath(this.rule.getXpath());
+
+			List<String> listFragments = new ArrayList<>();
+			if (selectable.match()) {
+				document.html(selectable.get());
+				bodyText = document.text();
+
+				Elements allElements = document.children().first().children();
+				for (Element element : allElements) {
+					String text = element.text();
+					if (StringUtils.isNotBlank(text)) {
+						listFragments.add(text.trim());
+					}
+				}
+			}
+
+			if (StringUtils.isBlank(bodyText)) {
+				page.setSkip(true);
+			}
+
+
+			if (StringUtils.isBlank(description) && StringUtils.isNotBlank(bodyText)) {
+				description = bodyText.length() >= 200 ? bodyText.substring(0, 200) : bodyText;
+			}
+
+			if (StringUtils.isBlank(keywords) && StringUtils.isNotBlank(description)) {
+				keywords = description;
+			}
+
+			page.putField("title", title);
+			page.putField("bodyText", bodyText);
+			page.putField("html", html);
+			page.putField("keywords", keywords);
+			page.putField("description", description);
+			page.putField("listFragments", JSON.toJSONString(listFragments));
+		}
+
+		@Override
+		public Site getSite() {
+			return this.site;
+		}
 	}
 }

+ 94 - 94
src/main/java/com/zhiqiyun/open/mvc/controller/PopularFeelingsController.java

@@ -35,132 +35,132 @@ import java.util.List;
 @RequestMapping("/popular/feelings")
 public class PopularFeelingsController {
 
-    @Autowired
-    private PopularFeelingsService popularFeelingsService;
+	@Autowired
+	private PopularFeelingsService popularFeelingsService;
 
-    @Autowired
-    private PopularFeelingsPageService popularFeelingsPageService;
+	@Autowired
+	private PopularFeelingsPageService popularFeelingsPageService;
 
-    @Autowired
-    private OauthService oauthService;
+	@Autowired
+	private OauthService oauthService;
 
-    @Autowired
-    private SequenceService sequenceService;
+	@Autowired
+	private SequenceService sequenceService;
 
 
-    @Permission(value = "popular.feelings.find", tags = "查询舆情监控")
-    @PostMapping("/findPage")
-    public Result findPage(@RequestBody QueryPopularFeelingsParam param) {
+	@Permission(value = "popular.feelings.find", tags = "查询舆情监控")
+	@PostMapping("/findPage")
+	public Result findPage(@RequestBody QueryPopularFeelingsParam param) {
 
-        QueryWrapper<PopularFeelings> wrapper = new QueryWrapper<>();
+		QueryWrapper<PopularFeelings> wrapper = new QueryWrapper<>();
 
-        if (StringUtils.isNotBlank(param.getTitle())) {
-            wrapper.like("title", param.getTitle());
-        }
+		if (StringUtils.isNotBlank(param.getTitle())) {
+			wrapper.like("title", param.getTitle());
+		}
 
-        if (StringUtils.isNotBlank(param.getKeywords())) {
-            wrapper.like("keywords", param.getKeywords());
-        }
+		if (StringUtils.isNotBlank(param.getKeywords())) {
+			wrapper.like("keywords", param.getKeywords());
+		}
 
-        if (StringUtils.isNotBlank(param.getSiteUrl())) {
-            wrapper.like("site_url", param.getSiteUrl());
-        }
+		if (StringUtils.isNotBlank(param.getSiteUrl())) {
+			wrapper.like("site_url", param.getSiteUrl());
+		}
 
-        Page<PopularFeelings> page = param.getPage();
-        page.addOrder(OrderItem.desc("id"));
+		Page<PopularFeelings> page = param.getPage();
+		page.addOrder(OrderItem.desc("id"));
 
-        Page<PopularFeelings> resultData = this.popularFeelingsService.page(page, wrapper);
-        for (PopularFeelings popular : resultData.getRecords()) {
-            popular.setStatus(this.popularFeelingsService.getStatus(popular.getId()));
+		Page<PopularFeelings> resultData = this.popularFeelingsService.page(page, wrapper);
+		for (PopularFeelings popular : resultData.getRecords()) {
+//            popular.setStatus(this.popularFeelingsService.getStatus(popular.getId()));
 
-            QueryWrapper<PopularFeelingsPage> wrapperPage = new QueryWrapper<>();
-            wrapperPage.eq("popular_feelings_id", popular.getId());
+			QueryWrapper<PopularFeelingsPage> wrapperPage = new QueryWrapper<>();
+			wrapperPage.eq("popular_feelings_id", popular.getId());
 
-            long spiderCount = this.popularFeelingsPageService.count(wrapperPage);
-            popular.setSpiderCount(spiderCount);
-        }
+			long spiderCount = this.popularFeelingsPageService.count(wrapperPage);
+			popular.setSpiderCount(spiderCount);
+		}
 
-        return Result.instance(Result.Code.SUCCESS).setData(resultData);
-    }
+		return Result.instance(Result.Code.SUCCESS).setData(resultData);
+	}
 
-    @Permission(value = "popular.feelings.add", tags = "新建舆情监控")
-    @PostMapping("/save")
-    public Result save(@Valid @RequestBody SavePopularFeelingsParam param) throws Exception {
+	@Permission(value = "popular.feelings.add", tags = "新建舆情监控")
+	@PostMapping("/save")
+	public Result save(@Valid @RequestBody SavePopularFeelingsParam param) throws Exception {
 
-        PopularFeelings entity = new PopularFeelings();
-        BeanUtils.copyProperties(param, entity);
+		PopularFeelings entity = new PopularFeelings();
+		BeanUtils.copyProperties(param, entity);
 
-        OauthInfo oauthInfo = this.oauthService.getAuth(ServletContext.getAccessToken());
+		OauthInfo oauthInfo = this.oauthService.getAuth(ServletContext.getAccessToken());
 
-        entity.setCreatedTime(DateUtil.current());
-        entity.setCreatedBy(oauthInfo.getId());
+		entity.setCreatedTime(DateUtil.current());
+		entity.setCreatedBy(oauthInfo.getId());
 
-        entity.setUpdatedTime(DateUtil.current());
-        entity.setUpdatedBy(oauthInfo.getId());
-        entity.setId(this.sequenceService.nextId());
+		entity.setUpdatedTime(DateUtil.current());
+		entity.setUpdatedBy(oauthInfo.getId());
+		entity.setId(this.sequenceService.nextId());
 
-        this.popularFeelingsService.save(entity);
+		this.popularFeelingsService.save(entity);
 
-        return Result.instance(Result.Code.MESSAGE_SUCCESS);
-    }
+		return Result.instance(Result.Code.MESSAGE_SUCCESS);
+	}
 
-    @Permission(value = "popular.feelings.edit", tags = "更新舆情监控")
-    @PostMapping("/updateById")
-    public Result updateById(Long id, @Valid @RequestBody SavePopularFeelingsParam param) {
-        PopularFeelings entity = new PopularFeelings();
-        BeanUtils.copyProperties(param, entity);
+	@Permission(value = "popular.feelings.edit", tags = "更新舆情监控")
+	@PostMapping("/updateById")
+	public Result updateById(Long id, @Valid @RequestBody SavePopularFeelingsParam param) {
+		PopularFeelings entity = new PopularFeelings();
+		BeanUtils.copyProperties(param, entity);
 
-        OauthInfo oauthInfo = this.oauthService.getAuth(ServletContext.getAccessToken());
+		OauthInfo oauthInfo = this.oauthService.getAuth(ServletContext.getAccessToken());
 
-        entity.setUpdatedBy(oauthInfo.getId());
-        entity.setUpdatedTime(DateUtil.current());
-        entity.setId(id);
+		entity.setUpdatedBy(oauthInfo.getId());
+		entity.setUpdatedTime(DateUtil.current());
+		entity.setId(id);
 
-        this.popularFeelingsService.updateById(entity);
+		this.popularFeelingsService.updateById(entity);
 
-        return Result.instance(Result.Code.MESSAGE_SUCCESS);
-    }
+		return Result.instance(Result.Code.MESSAGE_SUCCESS);
+	}
 
-    @Permission(value = "popular.feelings.delete", tags = "删除舆情监控")
-    @PostMapping("/deleteByIds")
-    public Result deleteByIds(@RequestBody List<Long> ids) {
-        this.popularFeelingsService.removeByIds(ids);
-        return Result.instance(Result.Code.MESSAGE_SUCCESS);
-    }
+	@Permission(value = "popular.feelings.delete", tags = "删除舆情监控")
+	@PostMapping("/deleteByIds")
+	public Result deleteByIds(@RequestBody List<Long> ids) {
+		this.popularFeelingsService.removeByIds(ids);
+		return Result.instance(Result.Code.MESSAGE_SUCCESS);
+	}
 
-    @Permission(value = "popular.feelings.start", tags = "启动舆情监控")
-    @PostMapping("/startSpider")
-    public Result startSpider(@RequestBody List<Long> ids) {
-        List<PopularFeelings> listData = this.popularFeelingsService.listByIds(ids);
-        for (PopularFeelings popularFeelings : listData) {
-            this.popularFeelingsService.start(popularFeelings);
-        }
-        return Result.instance(Result.Code.MESSAGE_SUCCESS);
-    }
+	@Permission(value = "popular.feelings.start", tags = "启动舆情监控")
+	@PostMapping("/startSpider")
+	public Result startSpider(@RequestBody List<Long> ids) {
+		List<PopularFeelings> listData = this.popularFeelingsService.listByIds(ids);
+		for (PopularFeelings popularFeelings : listData) {
+//            this.popularFeelingsService.start(popularFeelings);
+		}
+		return Result.instance(Result.Code.MESSAGE_SUCCESS);
+	}
 
-    @Permission(value = "popular.feelings.stop", tags = "停止舆情监控")
-    @PostMapping("/stopSpider")
-    public Result stopSpider(@RequestBody List<Long> ids) {
-        List<PopularFeelings> listData = this.popularFeelingsService.listByIds(ids);
-        for (PopularFeelings popularFeelings : listData) {
-            this.popularFeelingsService.stop(popularFeelings);
-        }
-        return Result.instance(Result.Code.MESSAGE_SUCCESS);
-    }
+	@Permission(value = "popular.feelings.stop", tags = "停止舆情监控")
+	@PostMapping("/stopSpider")
+	public Result stopSpider(@RequestBody List<Long> ids) {
+		List<PopularFeelings> listData = this.popularFeelingsService.listByIds(ids);
+		for (PopularFeelings popularFeelings : listData) {
+//            this.popularFeelingsService.stop(popularFeelings);
+		}
+		return Result.instance(Result.Code.MESSAGE_SUCCESS);
+	}
 
-    @Permission(value = "popular.feelings.find", tags = "查询舆情监控详情")
-    @PostMapping("/findDetailPage")
-    public Result findDetailPage(@RequestBody QueryPopularFeelingsPageParam param) {
+	@Permission(value = "popular.feelings.find", tags = "查询舆情监控详情")
+	@PostMapping("/findDetailPage")
+	public Result findDetailPage(@RequestBody QueryPopularFeelingsPageParam param) {
 
-        QueryWrapper<PopularFeelingsPage> wrapper = new QueryWrapper<>();
-        wrapper.select("id", "popular_feelings_id", "url", "title", "keywords", "description", "spider_time", "update_time");
-        wrapper.eq("popular_feelings_id", param.getPopularFeelingsId());
+		QueryWrapper<PopularFeelingsPage> wrapper = new QueryWrapper<>();
+		wrapper.select("id", "popular_feelings_id", "url", "title", "keywords", "description", "spider_time", "update_time");
+		wrapper.eq("popular_feelings_id", param.getPopularFeelingsId());
 
-        Page<PopularFeelingsPage> page = param.getPage();
-        page.addOrder(OrderItem.desc("id"));
+		Page<PopularFeelingsPage> page = param.getPage();
+		page.addOrder(OrderItem.desc("id"));
 
-        Page<PopularFeelingsPage> resultData = this.popularFeelingsPageService.page(page, wrapper);
+		Page<PopularFeelingsPage> resultData = this.popularFeelingsPageService.page(page, wrapper);
 
-        return Result.instance(Result.Code.SUCCESS).setData(resultData);
-    }
+		return Result.instance(Result.Code.SUCCESS).setData(resultData);
+	}
 }

+ 29 - 7
src/main/java/com/zhiqiyun/open/mvc/controller/SentimentSpiderController.java

@@ -8,6 +8,7 @@ import com.zhiqiyun.open.core.models.place.PlaceBaseInfo;
 import com.zhiqiyun.open.core.models.place.PlaceCategory;
 import com.zhiqiyun.open.core.models.sentiment.SentimentSpiderEvent;
 import com.zhiqiyun.open.core.models.sentiment.SentimentSpiderSiteRule;
+import com.zhiqiyun.open.core.models.statistics.PopularFeelings;
 import com.zhiqiyun.open.core.models.user.OauthInfo;
 import com.zhiqiyun.open.core.service.OauthService;
 import com.zhiqiyun.open.core.service.SentimentSpiderEventService;
@@ -62,8 +63,8 @@ public class SentimentSpiderController {
 		return Result.instance(Result.Code.SUCCESS).setData(resultData);
 	}
 
-
 	@ResponseBody
+	@Permission(tags = "通过关键词查询采集网站规则", writeLog = false)
 	@PostMapping("/rule/findSelectByKeyword")
 	public Result findSelectByKeyword(String keyword) {
 		QueryWrapper<SentimentSpiderSiteRule> queryWrapper = new QueryWrapper<>();
@@ -82,7 +83,7 @@ public class SentimentSpiderController {
 
 	@Permission(value = "sentiment.spider.rule.add", tags = "添加采集网站规则")
 	@PostMapping("/rule/save")
-	public Result saveRule(@Valid @RequestBody SaveSpiderSiteRuleParam param) throws Exception {
+	public Result saveRule(@Valid @RequestBody SaveSpiderSiteRuleParam param) {
 
 		SentimentSpiderSiteRule entity = new SentimentSpiderSiteRule();
 		BeanUtils.copyProperties(param, entity);
@@ -125,7 +126,6 @@ public class SentimentSpiderController {
 		return Result.instance(Result.Code.MESSAGE_SUCCESS);
 	}
 
-
 	@Permission(value = "sentiment.spider.event.find", tags = "查询舆情事件")
 	@PostMapping("/event/findPage")
 	public Result findEventPage(@RequestBody QuerySpiderEventParam param) {
@@ -153,8 +153,7 @@ public class SentimentSpiderController {
 		return Result.instance(Result.Code.SUCCESS).setData(resultData);
 	}
 
-
-	@Permission(value = "sentiment.spider.event.add", tags = "添加采集网站规则")
+	@Permission(value = "sentiment.spider.event.add", tags = "添加舆情事件")
 	@PostMapping("/event/save")
 	public Result saveEvent(@Valid @RequestBody SaveSpiderEventParam param) throws Exception {
 
@@ -175,7 +174,7 @@ public class SentimentSpiderController {
 		return Result.instance(Result.Code.MESSAGE_SUCCESS);
 	}
 
-	@Permission(value = "sentiment.spider.event.edit", tags = "修改采集网站规则")
+	@Permission(value = "sentiment.spider.event.edit", tags = "修改舆情事件")
 	@PostMapping("/event/updateById")
 	public Result updateRuleById(Long id, @Valid @RequestBody SaveSpiderEventParam param) {
 		SentimentSpiderEvent entity = new SentimentSpiderEvent();
@@ -192,10 +191,33 @@ public class SentimentSpiderController {
 		return Result.instance(Result.Code.MESSAGE_SUCCESS);
 	}
 
-	@Permission(value = "sentiment.spider.event.delete", tags = "采集网站规则删除")
+	@Permission(value = "sentiment.spider.event.delete", tags = "删除舆情事件")
 	@PostMapping("/event/deleteByIds")
 	public Result deleteEventByIds(@RequestBody List<Long> ids) {
 		this.sentimentSpiderEventService.removeByIds(ids);
 		return Result.instance(Result.Code.MESSAGE_SUCCESS);
 	}
+
+
+	@Permission(value = "sentiment.spider.event.start", tags = "启动舆情事件监控")
+	@PostMapping("/event/startSpider")
+	public Result startSpider(@RequestBody List<Long> ids) {
+		List<SentimentSpiderEvent> listData = this.sentimentSpiderEventService.listByIds(ids);
+		for (SentimentSpiderEvent event : listData) {
+			SentimentSpiderSiteRule rule = this.sentimentSpiderRuleService.getById(event.getSiteRuleId());
+			event.setRule(rule);
+			this.sentimentSpiderEventService.start(event);
+		}
+		return Result.instance(Result.Code.MESSAGE_SUCCESS);
+	}
+
+	@Permission(value = "sentiment.spider.event.stop", tags = "停止舆情事件监控")
+	@PostMapping("/event/stopSpider")
+	public Result stopSpider(@RequestBody List<Long> ids) {
+		List<SentimentSpiderEvent> listData = this.sentimentSpiderEventService.listByIds(ids);
+		for (SentimentSpiderEvent event : listData) {
+			this.sentimentSpiderEventService.stop(event);
+		}
+		return Result.instance(Result.Code.MESSAGE_SUCCESS);
+	}
 }

+ 3 - 3
src/main/java/com/zhiqiyun/open/router/apis/PopularFeelingsApi.java

@@ -55,7 +55,7 @@ public class PopularFeelingsApi {
 
 		Page<PopularFeelings> resultData = this.popularFeelingsService.page(page, wrapper);
 		for (PopularFeelings popular : resultData.getRecords()) {
-			popular.setStatus(this.popularFeelingsService.getStatus(popular.getId()));
+//			popular.setStatus(this.popularFeelingsService.getStatus(popular.getId()));
 
 			QueryWrapper<PopularFeelingsPage> wrapperPage = new QueryWrapper<>();
 			wrapperPage.eq("popular_feelings_id", popular.getId());
@@ -106,7 +106,7 @@ public class PopularFeelingsApi {
 	public OapResponse startSpider(IdsRequest request) {
 		List<PopularFeelings> listData = this.popularFeelingsService.listByIds(request.getIds());
 		for (PopularFeelings popularFeelings : listData) {
-			this.popularFeelingsService.start(popularFeelings);
+//			this.popularFeelingsService.start(popularFeelings);
 		}
 		return OapResponse.success();
 	}
@@ -115,7 +115,7 @@ public class PopularFeelingsApi {
 	public OapResponse stopSpider(IdsRequest request) {
 		List<PopularFeelings> listData = this.popularFeelingsService.listByIds(request.getIds());
 		for (PopularFeelings popularFeelings : listData) {
-			this.popularFeelingsService.stop(popularFeelings);
+//			this.popularFeelingsService.stop(popularFeelings);
 		}
 		return OapResponse.success();
 	}

+ 40 - 152
src/test/java/com/zhiqiyun/SimplePageProcessor.java

@@ -1,184 +1,72 @@
 package com.zhiqiyun;
 
 import com.alibaba.fastjson.JSON;
-import com.zhiqiyun.open.core.models.statistics.PopularFeelingsSiteRule;
+import com.zhiqiyun.open.core.models.sentiment.SentimentSpiderSiteRule;
+import com.zhiqiyun.open.core.service.impl.SentimentSpiderEventServiceImpl;
 import lombok.extern.slf4j.Slf4j;
 import org.apache.commons.codec.digest.DigestUtils;
-import org.apache.commons.lang3.StringUtils;
-import org.jsoup.nodes.Document;
-import org.jsoup.nodes.Element;
-import org.jsoup.select.Elements;
-import us.codecraft.webmagic.*;
+import us.codecraft.webmagic.ResultItems;
+import us.codecraft.webmagic.Spider;
+import us.codecraft.webmagic.Task;
 import us.codecraft.webmagic.pipeline.Pipeline;
-import us.codecraft.webmagic.processor.PageProcessor;
-import us.codecraft.webmagic.selector.Selectable;
 
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.List;
 import java.util.Map;
-import java.util.regex.Pattern;
 
 @Slf4j
 public class SimplePageProcessor {
 	public static void main(String[] args) {
-		PopularFeelingsSiteRule rule = new PopularFeelingsSiteRule();
-		rule.setStartUrls(Arrays.asList("https://www.cqn.com.cn/"));
+		SentimentSpiderSiteRule rule = new SentimentSpiderSiteRule();
+		rule.setStartUrls("https://www.cqn.com.cn/");
 		rule.setUrlPatterns("https://www.cqn.com.cn/([a-zA-Z]+)/content/([0-9]+)-([0-9]+)/([0-9]+)/content_([0-9]+).htm");
 		rule.setXpath("/html/body/div[4]/div[1]/div[3]/div[1]");
 
-		Spider spider = Spider.create(new SimplePageProcessor.PopularFeelingsProcessor(rule));
+		String[] startUrlArray = rule.getStartUrls().split("\n");
 
-		List<Pipeline> pipelines = new ArrayList<>();
-		pipelines.add(new Pipeline() {
-			@Override
-			public void process(ResultItems resultItems, Task task) {
-				Map<String, Object> dataMap = resultItems.getAll();
-
-				String url = resultItems.getRequest().getUrl();
-				String id = DigestUtils.md5Hex(url.replace("http://", "").replace("https://", ""));
+		Spider spider = Spider.create(new SentimentSpiderEventServiceImpl.SpiderEventProcessor(rule));
 
-				String title = dataMap.getOrDefault("title", "").toString();
-				String keywords = dataMap.getOrDefault("keywords", "").toString();
-				String description = dataMap.getOrDefault("description", "").toString();
-				String bodyText = dataMap.getOrDefault("bodyText", "").toString();
-				String html = dataMap.getOrDefault("html", "").toString();
-				List<String> listFragments = JSON.parseArray(dataMap.getOrDefault("listFragments", "[]").toString(), String.class);
 
-				String[] listKeywords = new String[]{"十三届五次会", "习近平"};
+		List<Pipeline> pipelines = new ArrayList<>();
+		pipelines.add((resultItems, task) -> {
+			Map<String, Object> dataMap = resultItems.getAll();
+
+			String url = resultItems.getRequest().getUrl();
+			String id = DigestUtils.md5Hex(url.replace("http://", "").replace("https://", ""));
+
+			String title = dataMap.getOrDefault("title", "").toString();
+			String keywords = dataMap.getOrDefault("keywords", "").toString();
+			String description = dataMap.getOrDefault("description", "").toString();
+			String bodyText = dataMap.getOrDefault("bodyText", "").toString();
+			String html = dataMap.getOrDefault("html", "").toString();
+			List<String> listFragments = JSON.parseArray(dataMap.getOrDefault("listFragments", "[]").toString(), String.class);
+
+			String[] listKeywords = new String[]{"十三届五次会", "习近平"};
+
+			boolean flag = true;
+			for (String keyword : listKeywords) {
+				if (!bodyText.contains(keyword)) {
+					flag = false;
+					break;
+				}
+			}
 
-				boolean flag = true;
+			listFragments.removeIf(o -> {
+				boolean hasContains = false;
 				for (String keyword : listKeywords) {
-					if (!bodyText.contains(keyword)) {
-						flag = false;
+					if (o.contains(keyword)) {
+						hasContains = true;
 						break;
 					}
 				}
+				return !hasContains;
+			});
 
-				listFragments.removeIf(o -> {
-					boolean hasContains = false;
-					for (String keyword : listKeywords) {
-						if (o.contains(keyword)) {
-							hasContains = true;
-							break;
-						}
-					}
-					return !hasContains;
-				});
-
-				log.info("{}>>>>>>>>{}>>>>>>>>{}", flag, listFragments.size(), bodyText);
-			}
+			log.info("{}>>>>>>>>{}>>>>>>>>{}", flag, listFragments.size(), bodyText);
 		});
 		spider.setPipelines(pipelines);
 		spider.setExitWhenComplete(true);
-		spider.addUrl(rule.getStartUrls().toArray(new String[]{}));
+		spider.addUrl(startUrlArray);
 		spider.runAsync();
 	}
-
-
-	static class PopularFeelingsProcessor implements PageProcessor {
-		private final List<String> LIST_SPIDER_URLS = new ArrayList<>();
-
-		private final Site site;
-
-		private final PopularFeelingsSiteRule rule;
-
-
-		public PopularFeelingsProcessor(PopularFeelingsSiteRule rule) {
-			this.rule = rule;
-			this.site = Site.me();
-			this.site.setUseGzip(true);
-			this.site.setRetryTimes(3);
-			this.site.setSleepTime(1000);
-		}
-
-		@Override
-		public void process(Page page) {
-			List<String> listUrls = page.getHtml().links().all();
-			listUrls.removeIf(url -> StringUtils.isBlank(url) || LIST_SPIDER_URLS.contains(url));
-
-			String urlPatterns = this.rule.getUrlPatterns();
-			if (StringUtils.isNotBlank(urlPatterns)) {
-				String[] listPatterns = urlPatterns.split("\n");
-				listUrls.removeIf(url -> {
-					boolean hasMatched = false;
-					for (String regex : listPatterns) {
-						hasMatched = Pattern.matches(regex, url);
-						if (hasMatched) {
-							break;
-						}
-					}
-					return !hasMatched;
-				});
-			}
-
-			LIST_SPIDER_URLS.addAll(listUrls);
-
-			page.addTargetRequests(listUrls);
-
-			Document document = page.getHtml().getDocument();
-
-			String title = document.title();
-			String bodyText = "";
-			String html = document.html();
-
-			String keywords = "";
-			String description = "";
-			Elements elements = document.getElementsByTag("meta");
-
-			if (elements != null && elements.size() > 0) {
-				for (Element element : elements) {
-					String metaName = element.attr("name");
-					String metaContent = element.attr("content");
-					if (StringUtils.equalsIgnoreCase("keywords", metaName)) {
-						keywords = metaContent;
-					}
-					if (StringUtils.equalsIgnoreCase("description", metaName)) {
-						description = metaContent;
-					}
-				}
-			}
-
-			Selectable selectable = page.getHtml().xpath(this.rule.getXpath());
-
-			List<String> listFragments = new ArrayList<>();
-			if (selectable.match()) {
-				document.html(selectable.get());
-				bodyText = document.text();
-
-				Elements allElements = document.children().first().children();
-				for (Element element : allElements) {
-					String text = element.text();
-					if (StringUtils.isNotBlank(text)) {
-						listFragments.add(text.trim());
-					}
-				}
-			}
-
-			if (StringUtils.isBlank(bodyText)) {
-				page.setSkip(true);
-			}
-
-
-			if (StringUtils.isBlank(description) && StringUtils.isNotBlank(bodyText)) {
-				description = bodyText.length() >= 200 ? bodyText.substring(0, 200) : bodyText;
-			}
-
-			if (StringUtils.isBlank(keywords) && StringUtils.isNotBlank(description)) {
-				keywords = description;
-			}
-
-			page.putField("title", title);
-			page.putField("bodyText", bodyText);
-			page.putField("html", html);
-			page.putField("keywords", keywords);
-			page.putField("description", description);
-			page.putField("listFragments", JSON.toJSONString(listFragments));
-		}
-
-		@Override
-		public Site getSite() {
-			return this.site;
-		}
-	}
 }