jtoms 4 年之前
父节点
当前提交
a4e1dd7c69
共有 18 个文件被更改,包括 375 次插入36 次删除
  1. 50 11
      pom.xml
  2. 12 0
      src/main/java/com/zhiqiyun/open/core/mapper/statistics/PopularFeelingsPageMapper.java
  3. 7 3
      src/main/java/com/zhiqiyun/open/core/models/statistics/PopularFeelings.java
  4. 21 0
      src/main/java/com/zhiqiyun/open/core/models/statistics/PopularFeelingsPage.java
  5. 34 0
      src/main/java/com/zhiqiyun/open/core/schedule/SpiderPopularFeelings.java
  6. 10 0
      src/main/java/com/zhiqiyun/open/core/service/PopularFeelingsPageService.java
  7. 7 0
      src/main/java/com/zhiqiyun/open/core/service/PopularFeelingsService.java
  8. 10 11
      src/main/java/com/zhiqiyun/open/core/service/impl/EquipmentPassengerServiceImpl.java
  9. 3 4
      src/main/java/com/zhiqiyun/open/core/service/impl/PlaceBaseInfoServiceImpl.java
  10. 16 0
      src/main/java/com/zhiqiyun/open/core/service/impl/PopularFeelingsPageServiceImpl.java
  11. 87 0
      src/main/java/com/zhiqiyun/open/core/service/impl/PopularFeelingsServiceImpl.java
  12. 32 0
      src/main/java/com/zhiqiyun/open/mvc/controller/PopularFeelingsController.java
  13. 14 0
      src/main/java/com/zhiqiyun/open/mvc/params/statistics/QueryPopularFeelingsPageParam.java
  14. 5 2
      src/main/java/com/zhiqiyun/open/mvc/params/statistics/SavePopularFeelingsParam.java
  15. 5 2
      src/main/java/com/zhiqiyun/open/utils/ClassScaner.java
  16. 16 2
      src/main/resources/db/migration/V1.0.7__popular_feelings.sql
  17. 2 1
      src/main/resources/logback-spring.xml
  18. 44 0
      src/test/java/com/zhiqiyun/SimplePageProcessor.java

+ 50 - 11
pom.xml

@@ -14,26 +14,26 @@
         <file.encoding>UTF-8</file.encoding>
         <junit.version>4.13.2</junit.version>
 
+        <slf4j.version>1.7.36</slf4j.version>
         <commons-lang3.version>3.12.0</commons-lang3.version>
         <commons-codec.version>1.15</commons-codec.version>
         <ip2region.version>1.7.2</ip2region.version>
 
         <spring.version>5.3.14</spring.version>
-        <spring-boot.version>2.6.2</spring-boot.version>
+        <spring-boot.version>2.6.3</spring-boot.version>
 
         <mysql-connector-java.version>8.0.27</mysql-connector-java.version>
         <mybatis-spring-boot-starter.version>2.2.0</mybatis-spring-boot-starter.version>
         <mybatis.version>3.5.7</mybatis.version>
+        <mybatis-plus-boot.version>3.5.1</mybatis-plus-boot.version>
 
-        <slf4j.version>1.7.32</slf4j.version>
-        <commons-lang3.version>3.12.0</commons-lang3.version>
         <lombok.version>1.18.22</lombok.version>
         <javax.servlet.version>4.0.1</javax.servlet.version>
         <okhttp.version>4.9.3</okhttp.version>
 
 
         <afirma-lib-jmimemagic.version>0.0.6</afirma-lib-jmimemagic.version>
-        <thumbnailator.version>0.4.14</thumbnailator.version>
+        <thumbnailator.version>0.4.16</thumbnailator.version>
 
         <framework.version>1.0.18</framework.version>
         <db-migration.version>1.0.0</db-migration.version>
@@ -100,13 +100,7 @@
         <dependency>
             <groupId>com.baomidou</groupId>
             <artifactId>mybatis-plus-boot-starter</artifactId>
-            <version>3.4.3.4</version>
-        </dependency>
-
-        <dependency>
-            <groupId>org.apache.commons</groupId>
-            <artifactId>commons-lang3</artifactId>
-            <version>${commons-lang3.version}</version>
+            <version>${mybatis-plus-boot.version}</version>
         </dependency>
         <dependency>
             <groupId>commons-codec</groupId>
@@ -135,6 +129,51 @@
             <artifactId>thumbnailator</artifactId>
             <version>${thumbnailator.version}</version>
         </dependency>
+        <dependency>
+            <groupId>us.codecraft</groupId>
+            <artifactId>webmagic-core</artifactId>
+            <version>0.7.5</version>
+        </dependency>
+        <dependency>
+            <groupId>us.codecraft</groupId>
+            <artifactId>webmagic-extension</artifactId>
+            <version>0.7.5</version>
+        </dependency>
+
+        <!--        <dependency>-->
+        <!--            <groupId>us.codecraft</groupId>-->
+        <!--            <artifactId>webmagic-core</artifactId>-->
+        <!--            <version>0.6.0</version>-->
+        <!--            <exclusions>-->
+        <!--                <exclusion>-->
+        <!--                    <groupId>org.slf4j</groupId>-->
+        <!--                    <artifactId>slf4j-log4j12</artifactId>-->
+        <!--                </exclusion>-->
+        <!--                <exclusion>-->
+        <!--                    <artifactId>slf4j-api</artifactId>-->
+        <!--                    <groupId>org.slf4j</groupId>-->
+        <!--                </exclusion>-->
+        <!--                <exclusion>-->
+        <!--                    <artifactId>guava</artifactId>-->
+        <!--                    <groupId>com.google.guava</groupId>-->
+        <!--                </exclusion>-->
+        <!--                <exclusion>-->
+        <!--                    <artifactId>commons-lang3</artifactId>-->
+        <!--                    <groupId>org.apache.commons</groupId>-->
+        <!--                </exclusion>-->
+        <!--            </exclusions>-->
+        <!--        </dependency>-->
+        <!--        <dependency>-->
+        <!--            <groupId>us.codecraft</groupId>-->
+        <!--            <artifactId>webmagic-extension</artifactId>-->
+        <!--            <version>0.6.0</version>-->
+        <!--            <exclusions>-->
+        <!--                <exclusion>-->
+        <!--                    <groupId>org.slf4j</groupId>-->
+        <!--                    <artifactId>slf4j-log4j12</artifactId>-->
+        <!--                </exclusion>-->
+        <!--            </exclusions>-->
+        <!--        </dependency>-->
     </dependencies>
     <build>
         <plugins>

+ 12 - 0
src/main/java/com/zhiqiyun/open/core/mapper/statistics/PopularFeelingsPageMapper.java

@@ -0,0 +1,12 @@
+package com.zhiqiyun.open.core.mapper.statistics;
+
+import com.baomidou.mybatisplus.core.mapper.BaseMapper;
+import com.zhiqiyun.open.core.models.statistics.PopularFeelingsPage;
+import org.apache.ibatis.annotations.Mapper;
+
+/**
+ * @author jtoms
+ */
+@Mapper
+public interface PopularFeelingsPageMapper extends BaseMapper<PopularFeelingsPage> {
+}

+ 7 - 3
src/main/java/com/zhiqiyun/open/core/models/statistics/PopularFeelings.java

@@ -3,20 +3,24 @@ package com.zhiqiyun.open.core.models.statistics;
 import com.baomidou.mybatisplus.annotation.FieldStrategy;
 import com.baomidou.mybatisplus.annotation.TableField;
 import com.baomidou.mybatisplus.annotation.TableName;
+import com.zhiqiyun.open.core.typeHandler.FastjsonTypeHandler;
 import lombok.Data;
 
 import java.util.Date;
+import java.util.List;
 
 /**
  * @author jtoms
  */
 @Data
-@TableName(value = "popular_feelings")
+@TableName(value = "popular_feelings", autoResultMap = true)
 public class PopularFeelings {
     private Long id;
-    private String title;
+    private String siteName;
     private String keywords;
-    private String siteUrl;
+    private String domain;
+    @TableField(typeHandler = FastjsonTypeHandler.class)
+    private List<String> startUrls;
 
     @TableField(updateStrategy = FieldStrategy.NEVER)
     private Date createdTime;

+ 21 - 0
src/main/java/com/zhiqiyun/open/core/models/statistics/PopularFeelingsPage.java

@@ -0,0 +1,21 @@
+package com.zhiqiyun.open.core.models.statistics;
+
+import com.baomidou.mybatisplus.annotation.TableName;
+import lombok.Data;
+
+import java.util.Date;
+
+/**
+ * @author jtoms
+ */
+@Data
+@TableName(value = "popular_feelings_page", autoResultMap = true)
+public class PopularFeelingsPage {
+    private String id;
+    private Long popularFeelingsId;
+    private String url;
+    private String title;
+    private String bodyText;
+    private String html;
+    private Date spiderTime;
+}

+ 34 - 0
src/main/java/com/zhiqiyun/open/core/schedule/SpiderPopularFeelings.java

@@ -0,0 +1,34 @@
+package com.zhiqiyun.open.core.schedule;
+
+import com.alibaba.fastjson.JSON;
+import com.zhiqiyun.open.core.models.statistics.PopularFeelings;
+import com.zhiqiyun.open.core.service.PopularFeelingsService;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.scheduling.annotation.Scheduled;
+import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;
+import org.springframework.stereotype.Component;
+
+import java.util.List;
+
+@Slf4j
+@Component
+public class SpiderPopularFeelings {
+    @Autowired
+    private PopularFeelingsService popularFeelingsService;
+
+    @Autowired
+    private ThreadPoolTaskExecutor threadPoolTaskExecutor;
+
+    @Scheduled(cron = "0/10 * * * * ?")
+    public void createPeople() {
+        List<PopularFeelings> listData = this.popularFeelingsService.list();
+        this.threadPoolTaskExecutor.execute(new Runnable() {
+            @Override
+            public void run() {
+                log.info("{}>>>>>>{}", listData.size(), JSON.toJSONString(listData));
+            }
+        });
+    }
+
+}

+ 10 - 0
src/main/java/com/zhiqiyun/open/core/service/PopularFeelingsPageService.java

@@ -0,0 +1,10 @@
+package com.zhiqiyun.open.core.service;
+
+import com.baomidou.mybatisplus.extension.service.IService;
+import com.zhiqiyun.open.core.models.statistics.PopularFeelingsPage;
+
+/**
+ * @author jtoms
+ */
+public interface PopularFeelingsPageService extends IService<PopularFeelingsPage> {
+}

+ 7 - 0
src/main/java/com/zhiqiyun/open/core/service/PopularFeelingsService.java

@@ -7,4 +7,11 @@ import com.zhiqiyun.open.core.models.statistics.PopularFeelings;
  * @author jtoms
  */
 public interface PopularFeelingsService extends IService<PopularFeelings> {
+    /**
+     * 启动采集
+     *
+     * @param popular
+     */
+    void start(PopularFeelings popular);
+
 }

+ 10 - 11
src/main/java/com/zhiqiyun/open/core/service/impl/EquipmentPassengerServiceImpl.java

@@ -11,7 +11,6 @@ import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.stereotype.Service;
 import org.springframework.transaction.annotation.Transactional;
 
-import java.io.Serializable;
 import java.util.Collection;
 
 /**
@@ -19,15 +18,15 @@ import java.util.Collection;
  */
 @Service
 public class EquipmentPassengerServiceImpl extends ServiceImpl<EquipmentPassengerMapper, EquipmentPassenger> implements EquipmentPassengerService {
-	@Autowired
-	private EquipmentPassengerPeopleMapper equipmentPassengerPeopleMapper;
+    @Autowired
+    private EquipmentPassengerPeopleMapper equipmentPassengerPeopleMapper;
 
-	@Override
-	@Transactional(rollbackFor = Exception.class)
-	public boolean removeByIds(Collection<? extends Serializable> idList) {
-		QueryWrapper<EquipmentPassengerPeople> wrapper = new QueryWrapper<>();
-		wrapper.in("equipment_id", idList);
-		this.equipmentPassengerPeopleMapper.delete(wrapper);
-		return super.removeByIds(idList);
-	}
+    @Override
+    @Transactional(rollbackFor = Exception.class)
+    public boolean removeByIds(Collection<?> list) {
+        QueryWrapper<EquipmentPassengerPeople> wrapper = new QueryWrapper<>();
+        wrapper.in("equipment_id", list);
+        this.equipmentPassengerPeopleMapper.delete(wrapper);
+        return super.removeByIds(list);
+    }
 }

+ 3 - 4
src/main/java/com/zhiqiyun/open/core/service/impl/PlaceBaseInfoServiceImpl.java

@@ -12,7 +12,6 @@ import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.stereotype.Service;
 import org.springframework.transaction.annotation.Transactional;
 
-import java.io.Serializable;
 import java.util.Collection;
 import java.util.List;
 
@@ -51,12 +50,12 @@ public class PlaceBaseInfoServiceImpl extends ServiceImpl<PlaceBaseInfoMapper, P
 
     @Override
     @Transactional(rollbackFor = Exception.class)
-    public boolean removeByIds(Collection<? extends Serializable> idList) {
+    public boolean removeByIds(Collection<?> list) {
 
         QueryWrapper<PlaceBaseInfoExtend> wrapper = new QueryWrapper<>();
-        wrapper.in("base_info_id", idList);
+        wrapper.in("base_info_id", list);
         this.placeBaseInfoExtendMapper.delete(wrapper);
 
-        return super.removeByIds(idList);
+        return super.removeByIds(list);
     }
 }

+ 16 - 0
src/main/java/com/zhiqiyun/open/core/service/impl/PopularFeelingsPageServiceImpl.java

@@ -0,0 +1,16 @@
+package com.zhiqiyun.open.core.service.impl;
+
+import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
+import com.zhiqiyun.open.core.mapper.statistics.PopularFeelingsPageMapper;
+import com.zhiqiyun.open.core.models.statistics.PopularFeelingsPage;
+import com.zhiqiyun.open.core.service.PopularFeelingsPageService;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.stereotype.Service;
+
+/**
+ * @author jtoms
+ */
+@Slf4j
+@Service
+public class PopularFeelingsPageServiceImpl extends ServiceImpl<PopularFeelingsPageMapper, PopularFeelingsPage> implements PopularFeelingsPageService {
+}

+ 87 - 0
src/main/java/com/zhiqiyun/open/core/service/impl/PopularFeelingsServiceImpl.java

@@ -2,13 +2,100 @@ package com.zhiqiyun.open.core.service.impl;
 
 import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
 import com.zhiqiyun.open.core.mapper.statistics.PopularFeelingsMapper;
+import com.zhiqiyun.open.core.mapper.statistics.PopularFeelingsPageMapper;
 import com.zhiqiyun.open.core.models.statistics.PopularFeelings;
+import com.zhiqiyun.open.core.models.statistics.PopularFeelingsPage;
 import com.zhiqiyun.open.core.service.PopularFeelingsService;
+import com.zhiqiyun.open.utils.DateUtil;
+import lombok.extern.slf4j.Slf4j;
+import org.apache.commons.lang3.StringUtils;
+import org.jsoup.nodes.Document;
+import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.stereotype.Service;
+import us.codecraft.webmagic.*;
+import us.codecraft.webmagic.processor.PageProcessor;
+
+import java.util.ArrayList;
+import java.util.List;
 
 /**
  * @author jtoms
  */
+@Slf4j
 @Service
 public class PopularFeelingsServiceImpl extends ServiceImpl<PopularFeelingsMapper, PopularFeelings> implements PopularFeelingsService {
+
+    @Autowired
+    private PopularFeelingsPageMapper popularFeelingsPageMapper;
+
+    @Override
+    public void start(PopularFeelings popular) {
+        String[] urls = popular.getStartUrls().toArray(new String[]{});
+
+        Spider spider = Spider.create(new PopularFeelingsProcessor(popular, this.popularFeelingsPageMapper));
+        List<SpiderListener> listListeners = new ArrayList<>();
+        listListeners.add(new SpiderListener() {
+            @Override
+            public void onSuccess(Request request) {
+                log.info("onSuccess>>>>>>{}", request.getUrl());
+            }
+
+            @Override
+            public void onError(Request request) {
+                log.info("onError>>>>>>{}", request.getUrl());
+
+            }
+        });
+        spider.setSpiderListeners(listListeners);
+        spider.addUrl(urls);
+        spider.runAsync();
+    }
+
+    static class PopularFeelingsProcessor implements PageProcessor {
+        private final List<String> LIST_SPIDER_URLS = new ArrayList<>();
+
+        private final Site site;
+
+        private final PopularFeelings popular;
+
+        private final PopularFeelingsPageMapper popularFeelingsPageMapper;
+
+        public PopularFeelingsProcessor(PopularFeelings popular, PopularFeelingsPageMapper popularFeelingsPageMapper) {
+            this.popular = popular;
+            this.site = Site.me().setDomain(this.popular.getDomain()).setRetryTimes(3).setSleepTime(1000);
+            this.popularFeelingsPageMapper = popularFeelingsPageMapper;
+        }
+
+        public void process(Page page) {
+            List<String> listUrls = page.getHtml().links().all();
+            listUrls.removeIf(s -> {
+                log.info(s);
+                return LIST_SPIDER_URLS.contains(s) || StringUtils.isBlank(s);
+            });
+            LIST_SPIDER_URLS.addAll(listUrls);
+            log.info(">>>>>>>>{}", LIST_SPIDER_URLS.size());
+            page.addTargetRequests(listUrls);
+
+            Document document = page.getHtml().getDocument();
+            String uri = document.baseUri();
+            log.info(uri);
+            String title = document.title();
+            String bodyText = document.body().text();
+            String html = document.html();
+            log.info("{}>>>>>>{}", uri, title);
+            PopularFeelingsPage popularFeelingsPage = new PopularFeelingsPage();
+            popularFeelingsPage.setId(document.id());
+            popularFeelingsPage.setPopularFeelingsId(popular.getId());
+            popularFeelingsPage.setUrl(uri);
+            popularFeelingsPage.setTitle(title);
+            popularFeelingsPage.setBodyText(bodyText);
+            popularFeelingsPage.setHtml(html);
+            popularFeelingsPage.setSpiderTime(DateUtil.current());
+            this.popularFeelingsPageMapper.insert(popularFeelingsPage);
+        }
+
+        public Site getSite() {
+            return this.site;
+        }
+    }
 }

+ 32 - 0
src/main/java/com/zhiqiyun/open/mvc/controller/PopularFeelingsController.java

@@ -5,11 +5,14 @@ import com.baomidou.mybatisplus.core.metadata.OrderItem;
 import com.baomidou.mybatisplus.extension.plugins.pagination.Page;
 import com.zhiqiyun.open.annotation.Permission;
 import com.zhiqiyun.open.core.models.statistics.PopularFeelings;
+import com.zhiqiyun.open.core.models.statistics.PopularFeelingsPage;
 import com.zhiqiyun.open.core.models.user.OauthInfo;
 import com.zhiqiyun.open.core.service.OauthService;
+import com.zhiqiyun.open.core.service.PopularFeelingsPageService;
 import com.zhiqiyun.open.core.service.PopularFeelingsService;
 import com.zhiqiyun.open.core.service.SequenceService;
 import com.zhiqiyun.open.mvc.Result;
+import com.zhiqiyun.open.mvc.params.statistics.QueryPopularFeelingsPageParam;
 import com.zhiqiyun.open.mvc.params.statistics.QueryPopularFeelingsParam;
 import com.zhiqiyun.open.mvc.params.statistics.SavePopularFeelingsParam;
 import com.zhiqiyun.open.utils.DateUtil;
@@ -35,6 +38,10 @@ public class PopularFeelingsController {
     @Autowired
     private PopularFeelingsService popularFeelingsService;
 
+
+    @Autowired
+    private PopularFeelingsPageService popularFeelingsPageService;
+
     @Autowired
     private OauthService oauthService;
 
@@ -112,4 +119,29 @@ public class PopularFeelingsController {
         this.popularFeelingsService.removeByIds(ids);
         return Result.instance(Result.Code.MESSAGE_SUCCESS);
     }
+
+    @Permission(value = "popular.feelings.delete", tags = "启动舆情监控")
+    @PostMapping("/startSpider")
+    public Result startSpider(@RequestBody List<Long> ids) {
+        List<PopularFeelings> listData = this.popularFeelingsService.listByIds(ids);
+        for (PopularFeelings popularFeelings : listData) {
+            this.popularFeelingsService.start(popularFeelings);
+        }
+        return Result.instance(Result.Code.MESSAGE_SUCCESS);
+    }
+
+    @Permission(value = "popular.feelings.delete", tags = "查询舆情监控详情")
+    @PostMapping("/findDetailPage")
+    public Result findDetailPage(@RequestBody QueryPopularFeelingsPageParam param) {
+
+        QueryWrapper<PopularFeelingsPage> wrapper = new QueryWrapper<>();
+        wrapper.eq("popular_feelings_id", param.getPopularFeelingsId());
+
+        Page<PopularFeelingsPage> page = param.getPage();
+        page.addOrder(OrderItem.desc("id"));
+
+        Page<PopularFeelingsPage> resultData = this.popularFeelingsPageService.page(page, wrapper);
+
+        return Result.instance(Result.Code.SUCCESS).setData(resultData);
+    }
 }

+ 14 - 0
src/main/java/com/zhiqiyun/open/mvc/params/statistics/QueryPopularFeelingsPageParam.java

@@ -0,0 +1,14 @@
+package com.zhiqiyun.open.mvc.params.statistics;
+
+import com.zhiqiyun.open.mvc.params.QueryPageParams;
+import lombok.Data;
+import lombok.EqualsAndHashCode;
+
+/**
+ * @author jtoms
+ */
+@EqualsAndHashCode(callSuper = true)
+@Data
+public class QueryPopularFeelingsPageParam extends QueryPageParams {
+    private Long popularFeelingsId;
+}

+ 5 - 2
src/main/java/com/zhiqiyun/open/mvc/params/statistics/SavePopularFeelingsParam.java

@@ -2,12 +2,15 @@ package com.zhiqiyun.open.mvc.params.statistics;
 
 import lombok.Data;
 
+import java.util.List;
+
 /**
  * @author jtoms
  */
 @Data
 public class SavePopularFeelingsParam {
-    private String title;
+    private String siteName;
     private String keywords;
-    private String siteUrl;
+    private String domain;
+    private List<String> startUrls;
 }

+ 5 - 2
src/main/java/com/zhiqiyun/open/utils/ClassScaner.java

@@ -1,6 +1,7 @@
 package com.zhiqiyun.open.utils;
 
 import org.apache.commons.lang3.ArrayUtils;
+import org.jetbrains.annotations.NotNull;
 import org.springframework.beans.factory.BeanDefinitionStoreException;
 import org.springframework.context.ResourceLoaderAware;
 import org.springframework.core.io.Resource;
@@ -38,6 +39,7 @@ public class ClassScaner implements ResourceLoaderAware {
     private ResourcePatternResolver resourcePatternResolver = new PathMatchingResourcePatternResolver();
     private MetadataReaderFactory metadataReaderFactory = new CachingMetadataReaderFactory(this.resourcePatternResolver);
 
+    @SafeVarargs
     public static Set<Class<?>> scan(String[] basePackages, Class<? extends Annotation>... annotations) {
         ClassScaner cs = new ClassScaner();
 
@@ -55,9 +57,10 @@ public class ClassScaner implements ResourceLoaderAware {
     }
 
     public static Set<Class<?>> scan(String basePackages) {
-        return ClassScaner.scan(StringUtils.tokenizeToStringArray(basePackages, ",; \t\n"), null);
+        return ClassScaner.scan(StringUtils.tokenizeToStringArray(basePackages, ",; \t\n"), (Class<? extends Annotation>) null);
     }
 
+    @SafeVarargs
     public static Set<Class<?>> scan(String basePackages, Class<? extends Annotation>... annotations) {
         return ClassScaner.scan(StringUtils.tokenizeToStringArray(basePackages, ",; \t\n"), annotations);
     }
@@ -67,7 +70,7 @@ public class ClassScaner implements ResourceLoaderAware {
     }
 
     @Override
-    public void setResourceLoader(ResourceLoader resourceLoader) {
+    public void setResourceLoader(@NotNull ResourceLoader resourceLoader) {
         this.resourcePatternResolver = ResourcePatternUtils.getResourcePatternResolver(resourceLoader);
         this.metadataReaderFactory = new CachingMetadataReaderFactory(resourceLoader);
     }

+ 16 - 2
src/main/resources/db/migration/V1.0.7__popular_feelings.sql

@@ -11,12 +11,26 @@ DROP TABLE IF EXISTS `popular_feelings`;
 CREATE TABLE `popular_feelings`
 (
     `id`           BIGINT(20) NOT NULL COMMENT 'ID',
-    `title`        VARCHAR(50) NOT NULL COMMENT '标题',
+    `site_name`    VARCHAR(50) NOT NULL COMMENT '网站名称',
     `keywords`     VARCHAR(50) NOT NULL COMMENT '监控关键词',
-    `site_url`     VARCHAR(50) NOT NULL COMMENT '网站URL',
+    `domain`       VARCHAR(50) NOT NULL COMMENT '域名',
+    `start_urls`   VARCHAR(50) NOT NULL COMMENT '启动域名',
     `created_time` DATETIME NULL DEFAULT NULL COMMENT '创建时间',
     `created_by`   BIGINT(20) NULL DEFAULT NULL COMMENT '创建人',
     `updated_time` DATETIME NULL DEFAULT NULL COMMENT '修改时间',
     `updated_by`   BIGINT(20) NULL DEFAULT NULL COMMENT '修改人',
     PRIMARY KEY (`id`)
 ) COMMENT ='舆情监控管理' ENGINE = InnoDB;
+
+DROP TABLE IF EXISTS `popular_feelings_page`;
+CREATE TABLE `popular_feelings_page`
+(
+    `id`                  VARCHAR(50) NOT NULL COMMENT 'ID',
+    `popular_feelings_id` BIGINT(20) NOT NULL COMMENT '舆情监控ID',
+    `url`                 VARCHAR(500) NOT NULL COMMENT 'URL地址',
+    `title`               TEXT         NOT NULL COMMENT '标题',
+    `body_text`           LONGTEXT     NOT NULL COMMENT '网页文本',
+    `html`                LONGTEXT     NOT NULL COMMENT '全文本HTML',
+    `spider_time`         DATETIME NULL DEFAULT NULL COMMENT '采集时间',
+    PRIMARY KEY (`id`)
+) COMMENT ='舆情采集页面' ENGINE = InnoDB;

+ 2 - 1
src/main/resources/logback-spring.xml

@@ -17,6 +17,7 @@
     <logger name="io.lettuce" level="INFO" />
     <logger name="com.zaxxer" level="INFO" />
     <logger name="org.ehcache" level="INFO" />
+    <logger name="org.apache.http" level="INFO" />
 
     <property name="LOG_PATH" value="${LOGGER_ROOT_PATH}/${SPRING_APP_NAME}" />
     <property name="MAX_HISTORY" value="10" />
@@ -114,4 +115,4 @@
         <appender-ref ref="FILE_ERROR_LOG" />
         <appender-ref ref="STDOUT" />
     </root>
-</configuration>
+</configuration>

+ 44 - 0
src/test/java/com/zhiqiyun/SimplePageProcessor.java

@@ -0,0 +1,44 @@
+package com.zhiqiyun;
+
+import lombok.extern.slf4j.Slf4j;
+import org.apache.commons.lang3.StringUtils;
+import us.codecraft.webmagic.Page;
+import us.codecraft.webmagic.Site;
+import us.codecraft.webmagic.Spider;
+import us.codecraft.webmagic.processor.PageProcessor;
+
+import java.util.ArrayList;
+import java.util.List;
+
+@Slf4j
+public class SimplePageProcessor implements PageProcessor {
+    private final Site site = Site.me().setDomain("www.188420.com").setRetryTimes(3).setSleepTime(1000);
+
+    private static final List<String> LIST_SPIDER_URLS = new ArrayList<>();
+
+    public SimplePageProcessor() {
+    }
+
+    public void process(Page page) {
+        List<String> listUrls = page.getHtml().links().all();
+        listUrls.removeIf(s -> {
+            return LIST_SPIDER_URLS.contains(s) || StringUtils.isBlank(s);
+        });
+        LIST_SPIDER_URLS.addAll(listUrls);
+        log.info(">>>>>>>>{}", LIST_SPIDER_URLS.size());
+        page.addTargetRequests(listUrls);
+
+        page.putField("title", page.getHtml().getDocument().title());
+        if (page.getResultItems().get("title") == null) {
+            page.setSkip(true);
+        }
+    }
+
+    public Site getSite() {
+        return this.site;
+    }
+
+    public static void main(String[] args) {
+        Spider.create(new SimplePageProcessor()).addUrl("https://www.188420.com").run();
+    }
+}