jtoms 4 năm trước cách đây
mục cha
commit
5054ac5542
20 tập tin đã thay đổi với 293 bổ sung635 xóa
  1. 10 0
      pom.xml
  2. 0 12
      src/main/java/com/zhiqiyun/open/core/mapper/statistics/PopularFeelingsMapper.java
  3. 0 12
      src/main/java/com/zhiqiyun/open/core/mapper/statistics/PopularFeelingsPageMapper.java
  4. 0 39
      src/main/java/com/zhiqiyun/open/core/models/statistics/PopularFeelings.java
  5. 0 27
      src/main/java/com/zhiqiyun/open/core/models/statistics/PopularFeelingsPage.java
  6. 0 10
      src/main/java/com/zhiqiyun/open/core/service/PopularFeelingsPageService.java
  7. 0 31
      src/main/java/com/zhiqiyun/open/core/service/PopularFeelingsService.java
  8. 0 16
      src/main/java/com/zhiqiyun/open/core/service/impl/PopularFeelingsPageServiceImpl.java
  9. 0 220
      src/main/java/com/zhiqiyun/open/core/service/impl/PopularFeelingsServiceImpl.java
  10. 18 1
      src/main/java/com/zhiqiyun/open/core/service/impl/SentimentSpiderEventServiceImpl.java
  11. 153 0
      src/main/java/com/zhiqiyun/open/core/spiders/OkHttpDownloader.java
  12. 58 0
      src/main/java/com/zhiqiyun/open/core/spiders/PhantomjsDownloader.java
  13. 0 166
      src/main/java/com/zhiqiyun/open/mvc/controller/PopularFeelingsController.java
  14. 0 14
      src/main/java/com/zhiqiyun/open/mvc/params/statistics/QueryPopularFeelingsPageParam.java
  15. 0 16
      src/main/java/com/zhiqiyun/open/mvc/params/statistics/QueryPopularFeelingsParam.java
  16. 0 24
      src/main/java/com/zhiqiyun/open/mvc/params/statistics/SavePopularFeelingsParam.java
  17. 2 2
      src/main/java/com/zhiqiyun/open/router/apis/SentimentSpiderApi.java
  18. 2 0
      src/main/resources/application.properties
  19. 1 0
      src/main/resources/logback-spring.xml
  20. 49 45
      src/test/java/com/zhiqiyun/SimplePageProcessor.java

+ 10 - 0
pom.xml

@@ -146,6 +146,16 @@
             <artifactId>webmagic-extension</artifactId>
             <version>${webmagic.version}</version>
         </dependency>
+        <dependency>
+            <groupId>org.seleniumhq.selenium</groupId>
+            <artifactId>selenium-java</artifactId>
+            <version>4.1.2</version>
+        </dependency>
+        <dependency>
+            <groupId>com.codeborne</groupId>
+            <artifactId>phantomjsdriver</artifactId>
+            <version>1.5.0</version>
+        </dependency>
     </dependencies>
     <build>
         <plugins>

+ 0 - 12
src/main/java/com/zhiqiyun/open/core/mapper/statistics/PopularFeelingsMapper.java

@@ -1,12 +0,0 @@
-package com.zhiqiyun.open.core.mapper.statistics;
-
-import com.baomidou.mybatisplus.core.mapper.BaseMapper;
-import com.zhiqiyun.open.core.models.statistics.PopularFeelings;
-import org.apache.ibatis.annotations.Mapper;
-
-/**
- * @author jtoms
- */
-@Mapper
-public interface PopularFeelingsMapper extends BaseMapper<PopularFeelings> {
-}

+ 0 - 12
src/main/java/com/zhiqiyun/open/core/mapper/statistics/PopularFeelingsPageMapper.java

@@ -1,12 +0,0 @@
-package com.zhiqiyun.open.core.mapper.statistics;
-
-import com.baomidou.mybatisplus.core.mapper.BaseMapper;
-import com.zhiqiyun.open.core.models.statistics.PopularFeelingsPage;
-import org.apache.ibatis.annotations.Mapper;
-
-/**
- * @author jtoms
- */
-@Mapper
-public interface PopularFeelingsPageMapper extends BaseMapper<PopularFeelingsPage> {
-}

+ 0 - 39
src/main/java/com/zhiqiyun/open/core/models/statistics/PopularFeelings.java

@@ -1,39 +0,0 @@
-package com.zhiqiyun.open.core.models.statistics;
-
-import com.baomidou.mybatisplus.annotation.FieldStrategy;
-import com.baomidou.mybatisplus.annotation.TableField;
-import com.baomidou.mybatisplus.annotation.TableName;
-import com.zhiqiyun.open.core.typeHandler.FastjsonTypeHandler;
-import lombok.Data;
-
-import java.util.Date;
-import java.util.List;
-
-/**
- * @author jtoms
- */
-@Data
-@TableName(value = "popular_feelings", autoResultMap = true)
-public class PopularFeelings {
-    private Long id;
-    private String siteName;
-    private String keywords;
-    private String domain;
-    private String urlPatterns;
-
-    @TableField(typeHandler = FastjsonTypeHandler.class)
-    private List<String> startUrls;
-
-    @TableField(updateStrategy = FieldStrategy.NEVER)
-    private Date createdTime;
-    @TableField(updateStrategy = FieldStrategy.NEVER)
-    private Long createdBy;
-
-    private Date updatedTime;
-    private Long updatedBy;
-
-    @TableField(exist = false)
-    private Integer status;
-    @TableField(exist = false)
-    private Long spiderCount;
-}

+ 0 - 27
src/main/java/com/zhiqiyun/open/core/models/statistics/PopularFeelingsPage.java

@@ -1,27 +0,0 @@
-package com.zhiqiyun.open.core.models.statistics;
-
-import com.baomidou.mybatisplus.annotation.FieldStrategy;
-import com.baomidou.mybatisplus.annotation.TableField;
-import com.baomidou.mybatisplus.annotation.TableName;
-import lombok.Data;
-
-import java.util.Date;
-
-/**
- * @author jtoms
- */
-@Data
-@TableName(value = "popular_feelings_page", autoResultMap = true)
-public class PopularFeelingsPage {
-    private String id;
-    private Long popularFeelingsId;
-    private String url;
-    private String title;
-    private String keywords;
-    private String description;
-    private String bodyText;
-    private String html;
-    @TableField(updateStrategy = FieldStrategy.NEVER)
-    private Date spiderTime;
-    private Date updateTime;
-}

+ 0 - 10
src/main/java/com/zhiqiyun/open/core/service/PopularFeelingsPageService.java

@@ -1,10 +0,0 @@
-package com.zhiqiyun.open.core.service;
-
-import com.baomidou.mybatisplus.extension.service.IService;
-import com.zhiqiyun.open.core.models.statistics.PopularFeelingsPage;
-
-/**
- * @author jtoms
- */
-public interface PopularFeelingsPageService extends IService<PopularFeelingsPage> {
-}

+ 0 - 31
src/main/java/com/zhiqiyun/open/core/service/PopularFeelingsService.java

@@ -1,31 +0,0 @@
-package com.zhiqiyun.open.core.service;
-
-import com.baomidou.mybatisplus.extension.service.IService;
-import com.zhiqiyun.open.core.models.statistics.PopularFeelings;
-
-/**
- * @author jtoms
- */
-public interface PopularFeelingsService extends IService<PopularFeelings> {
-    /**
-     * 启动采集
-     *
-     * @param popular
-     */
-//    void start(PopularFeelings popular);
-
-    /**
-     * 停止采集
-     *
-     * @param popular
-     */
-//    void stop(PopularFeelings popular);
-
-    /**
-     * 查询运行状态
-     *
-     * @param popularFeelingsId
-     */
-//    int getStatus(Long popularFeelingsId);
-
-}

+ 0 - 16
src/main/java/com/zhiqiyun/open/core/service/impl/PopularFeelingsPageServiceImpl.java

@@ -1,16 +0,0 @@
-package com.zhiqiyun.open.core.service.impl;
-
-import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
-import com.zhiqiyun.open.core.mapper.statistics.PopularFeelingsPageMapper;
-import com.zhiqiyun.open.core.models.statistics.PopularFeelingsPage;
-import com.zhiqiyun.open.core.service.PopularFeelingsPageService;
-import lombok.extern.slf4j.Slf4j;
-import org.springframework.stereotype.Service;
-
-/**
- * @author jtoms
- */
-@Slf4j
-@Service
-public class PopularFeelingsPageServiceImpl extends ServiceImpl<PopularFeelingsPageMapper, PopularFeelingsPage> implements PopularFeelingsPageService {
-}

+ 0 - 220
src/main/java/com/zhiqiyun/open/core/service/impl/PopularFeelingsServiceImpl.java

@@ -1,220 +0,0 @@
-package com.zhiqiyun.open.core.service.impl;
-
-import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
-import com.zhiqiyun.open.core.mapper.statistics.PopularFeelingsMapper;
-import com.zhiqiyun.open.core.models.statistics.PopularFeelings;
-import com.zhiqiyun.open.core.models.statistics.PopularFeelingsPage;
-import com.zhiqiyun.open.core.service.PopularFeelingsPageService;
-import com.zhiqiyun.open.core.service.PopularFeelingsService;
-import com.zhiqiyun.open.utils.DateUtil;
-import lombok.extern.slf4j.Slf4j;
-import org.apache.commons.codec.digest.DigestUtils;
-import org.apache.commons.lang3.StringUtils;
-import org.jsoup.nodes.Document;
-import org.jsoup.nodes.Element;
-import org.jsoup.select.Elements;
-import org.springframework.beans.factory.annotation.Autowired;
-import org.springframework.stereotype.Service;
-import us.codecraft.webmagic.*;
-import us.codecraft.webmagic.pipeline.Pipeline;
-import us.codecraft.webmagic.processor.PageProcessor;
-
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-import java.util.concurrent.ConcurrentHashMap;
-import java.util.regex.Pattern;
-
-/**
- * @author jtoms
- */
-@Slf4j
-@Service
-public class PopularFeelingsServiceImpl extends ServiceImpl<PopularFeelingsMapper, PopularFeelings> implements PopularFeelingsService {
-
-//    @Autowired
-//    private PopularFeelingsPageService popularFeelingsPageService;
-//
-//    private static final ConcurrentHashMap<Long, Spider> SPIDER_RUNNING_MAP = new ConcurrentHashMap<>();
-//
-//    @Override
-//    public void start(PopularFeelings popular) {
-//        String[] urls = popular.getStartUrls().toArray(new String[]{});
-//
-//        Spider spider = Spider.create(new PopularFeelingsProcessor(popular));
-//        List<SpiderListener> listListeners = new ArrayList<>();
-//        listListeners.add(new SpiderListener() {
-//            @Override
-//            public void onSuccess(Request request) {
-//                log.info("onSuccess>>>>>>{}", request.getUrl());
-//            }
-//
-//            @Override
-//            public void onError(Request request) {
-//                log.info("onError>>>>>>{}", request.getUrl());
-//            }
-//        });
-//        List<Pipeline> pipelines = new ArrayList<>();
-//        pipelines.add(new Pipeline() {
-//            @Override
-//            public void process(ResultItems resultItems, Task task) {
-//                Map<String, Object> dataMap = resultItems.getAll();
-//                log.info(resultItems.getRequest().getUrl());
-//                String url = resultItems.getRequest().getUrl();
-//                String id = DigestUtils.md5Hex(url.replace("http://", "").replace("https://", ""));
-//
-//                String title = dataMap.getOrDefault("title", "").toString();
-//                String keywords = dataMap.getOrDefault("keywords", "").toString();
-//                String description = dataMap.getOrDefault("description", "").toString();
-//                String bodyText = dataMap.getOrDefault("bodyText", "").toString();
-//                String html = dataMap.getOrDefault("html", "").toString();
-//
-//                if (StringUtils.isBlank(popular.getKeywords())) {
-//                    return;
-//                }
-//
-//                String[] listKeywords = popular.getKeywords().split(",");
-//
-//                boolean flag = true;
-//                for (String k : listKeywords) {
-//                    if (!bodyText.contains(k)) {
-//                        flag = false;
-//                        break;
-//                    }
-//                }
-//
-//                if (flag) {
-//                    PopularFeelingsPage popularFeelingsPage = new PopularFeelingsPage();
-//
-//                    popularFeelingsPage.setId(id);
-//                    popularFeelingsPage.setPopularFeelingsId(popular.getId());
-//                    popularFeelingsPage.setUrl(url);
-//                    popularFeelingsPage.setTitle(title);
-//                    popularFeelingsPage.setKeywords(keywords);
-//                    popularFeelingsPage.setDescription(description);
-//                    popularFeelingsPage.setBodyText(bodyText);
-//                    popularFeelingsPage.setHtml(html);
-//                    popularFeelingsPage.setSpiderTime(DateUtil.current());
-//                    popularFeelingsPage.setUpdateTime(DateUtil.current());
-//                    popularFeelingsPageService.saveOrUpdate(popularFeelingsPage);
-//                }
-//            }
-//        });
-//        spider.setPipelines(pipelines);
-//        spider.setExitWhenComplete(true);
-//        spider.setSpiderListeners(listListeners);
-//        spider.addUrl(urls);
-//        spider.runAsync();
-//
-//        SPIDER_RUNNING_MAP.put(popular.getId(), spider);
-//    }
-//
-//    @Override
-//    public void stop(PopularFeelings popular) {
-//        Spider spider = SPIDER_RUNNING_MAP.get(popular.getId());
-//        if (spider != null && spider.getStatus().equals(Spider.Status.Running)) {
-//            spider.stop();
-//        }
-//    }
-//
-//    @Override
-//    public int getStatus(Long popularFeelingsId) {
-//        Spider spider = SPIDER_RUNNING_MAP.get(popularFeelingsId);
-//        if (spider == null) {
-//            return 0;
-//        } else if (spider.getStatus().equals(Spider.Status.Running)) {
-//            return 1;
-//        } else if (spider.getStatus().equals(Spider.Status.Stopped)) {
-//            return 2;
-//        } else {
-//            return 0;
-//        }
-//    }
-//
-//    static class PopularFeelingsProcessor implements PageProcessor {
-//        private final List<String> LIST_SPIDER_URLS = new ArrayList<>();
-//
-//        private final Site site;
-//
-//        private final PopularFeelings popular;
-//
-//
-//        public PopularFeelingsProcessor(PopularFeelings popular) {
-//            this.popular = popular;
-//
-//            this.site = Site.me();
-//            this.site.setDomain(this.popular.getDomain());
-//            this.site.setUseGzip(true);
-//            this.site.setRetryTimes(3);
-//            this.site.setSleepTime(1000);
-//        }
-//
-//        @Override
-//        public void process(Page page) {
-//            List<String> listUrls = page.getHtml().links().all();
-//            listUrls.removeIf(url -> StringUtils.isBlank(url) || LIST_SPIDER_URLS.contains(url));
-//
-//            String urlPatterns = this.popular.getUrlPatterns();
-//            if (StringUtils.isNotBlank(urlPatterns)) {
-//                String[] listPatterns = urlPatterns.split("\n");
-//                listUrls.removeIf(url -> {
-//                    boolean hasMatched = false;
-//                    for (String regex : listPatterns) {
-//                        hasMatched = Pattern.matches(regex, url);
-//                        if (hasMatched) {
-//                            break;
-//                        }
-//                    }
-//                    return !hasMatched;
-//                });
-//            }
-//
-//            LIST_SPIDER_URLS.addAll(listUrls);
-//
-//            page.addTargetRequests(listUrls);
-//
-//            Document document = page.getHtml().getDocument();
-//
-//            String title = document.title();
-//            String bodyText = document.text();
-//            String html = document.html();
-//
-//            String keywords = "";
-//            String description = "";
-//            Elements elements = document.getElementsByTag("meta");
-//
-//            if (elements != null && elements.size() > 0) {
-//                for (Element element : elements) {
-//                    String metaName = element.attr("name");
-//                    String metaContent = element.attr("content");
-//                    if (StringUtils.equalsIgnoreCase("keywords", metaName)) {
-//                        keywords = metaContent;
-//                    }
-//                    if (StringUtils.equalsIgnoreCase("description", metaName)) {
-//                        description = metaContent;
-//                    }
-//                }
-//            }
-//
-//            if (StringUtils.isBlank(description) && StringUtils.isNotBlank(bodyText)) {
-//                description = bodyText.length() >= 200 ? bodyText.substring(0, 200) : bodyText;
-//            }
-//
-//            if (StringUtils.isBlank(keywords) && StringUtils.isNotBlank(description)) {
-//                keywords = description;
-//            }
-//
-//
-//            page.putField("title", title);
-//            page.putField("bodyText", bodyText);
-//            page.putField("html", html);
-//            page.putField("keywords", keywords);
-//            page.putField("description", description);
-//        }
-//
-//        @Override
-//        public Site getSite() {
-//            return this.site;
-//        }
-//    }
-}

+ 18 - 1
src/main/java/com/zhiqiyun/open/core/service/impl/SentimentSpiderEventServiceImpl.java

@@ -8,16 +8,22 @@ import com.zhiqiyun.open.core.models.sentiment.SentimentSpiderResult;
 import com.zhiqiyun.open.core.models.sentiment.SentimentSpiderSiteRule;
 import com.zhiqiyun.open.core.service.SentimentSpiderEventService;
 import com.zhiqiyun.open.core.service.SentimentSpiderResultService;
+import com.zhiqiyun.open.core.spiders.OkHttpDownloader;
 import com.zhiqiyun.open.utils.DateUtil;
 import lombok.extern.slf4j.Slf4j;
 import org.apache.commons.codec.digest.DigestUtils;
 import org.apache.commons.lang3.StringUtils;
+import org.jetbrains.annotations.NotNull;
 import org.jsoup.nodes.Document;
 import org.jsoup.nodes.Element;
 import org.jsoup.select.Elements;
+import org.springframework.beans.BeansException;
 import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.context.ApplicationContext;
+import org.springframework.context.ApplicationContextAware;
 import org.springframework.stereotype.Service;
 import us.codecraft.webmagic.*;
+import us.codecraft.webmagic.downloader.Downloader;
 import us.codecraft.webmagic.pipeline.Pipeline;
 import us.codecraft.webmagic.processor.PageProcessor;
 import us.codecraft.webmagic.selector.Selectable;
@@ -30,13 +36,20 @@ import java.util.regex.Pattern;
 
 @Slf4j
 @Service
-public class SentimentSpiderEventServiceImpl extends ServiceImpl<SentimentSpiderEventMapper, SentimentSpiderEvent> implements SentimentSpiderEventService {
+public class SentimentSpiderEventServiceImpl extends ServiceImpl<SentimentSpiderEventMapper, SentimentSpiderEvent> implements SentimentSpiderEventService, ApplicationContextAware {
 
     private static final ConcurrentHashMap<Long, Spider> SPIDER_RUNNING_MAP = new ConcurrentHashMap<>();
 
     @Autowired
     private SentimentSpiderResultService sentimentSpiderResultService;
 
+    private ApplicationContext applicationContext;
+
+
+    @Override
+    public void setApplicationContext(@NotNull ApplicationContext applicationContext) throws BeansException {
+        this.applicationContext = applicationContext;
+    }
 
     @Override
     public void start(SentimentSpiderEvent event) {
@@ -55,6 +68,8 @@ public class SentimentSpiderEventServiceImpl extends ServiceImpl<SentimentSpider
         pipelines.add(new SpiderEventPipeline(keywords, event.getId(), event.getSiteRuleId(), this.sentimentSpiderResultService));
         spider.setPipelines(pipelines);
         spider.setExitWhenComplete(true);
+        Downloader downloader = this.applicationContext.getBean(OkHttpDownloader.class);
+        spider.setDownloader(downloader);
         spider.addUrl(urlArray);
         spider.runAsync();
 
@@ -83,6 +98,7 @@ public class SentimentSpiderEventServiceImpl extends ServiceImpl<SentimentSpider
         }
     }
 
+
     public static class SpiderEventPipeline implements Pipeline {
 
         private final String[] keywords;
@@ -175,6 +191,7 @@ public class SentimentSpiderEventServiceImpl extends ServiceImpl<SentimentSpider
         @Override
         public void process(Page page) {
             List<String> listUrls = page.getHtml().links().all();
+
             listUrls.removeIf(url -> StringUtils.isBlank(url) || LIST_SPIDER_URLS.contains(url));
 
             String urlPatterns = this.rule.getUrlPatterns();

+ 153 - 0
src/main/java/com/zhiqiyun/open/core/spiders/OkHttpDownloader.java

@@ -0,0 +1,153 @@
+package com.zhiqiyun.open.core.spiders;
+
+import lombok.extern.slf4j.Slf4j;
+import okhttp3.OkHttpClient;
+import okhttp3.Response;
+import org.apache.commons.io.IOUtils;
+import org.apache.http.HttpResponse;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.stereotype.Component;
+import us.codecraft.webmagic.Page;
+import us.codecraft.webmagic.Request;
+import us.codecraft.webmagic.Task;
+import us.codecraft.webmagic.downloader.Downloader;
+import us.codecraft.webmagic.selector.PlainText;
+import us.codecraft.webmagic.utils.CharsetUtils;
+import us.codecraft.webmagic.utils.HttpClientUtils;
+
+import java.io.IOException;
+import java.nio.charset.Charset;
+
+@Slf4j
+@Component("okhttp.downloader")
+public class OkHttpDownloader implements Downloader {
+
+    @Autowired
+    private OkHttpClient okHttpClient;
+
+    @Override
+    public Page download(Request request, Task task) {
+        Page page = new Page();
+        try {
+            log.info(request.getUrl());
+
+            okhttp3.Request.Builder builder = new okhttp3.Request.Builder();
+            builder.url(request.getUrl());
+            builder.get();
+
+            Response resp = this.okHttpClient.newCall(builder.build()).execute();
+
+            String rawText = resp.body().string();
+
+            page.setBytes(rawText.getBytes());
+            page.setRawText(rawText);
+            page.setUrl(new PlainText(request.getUrl()));
+            page.setRequest(request);
+            page.setStatusCode(resp.code());
+            page.setDownloadSuccess(true);
+            page.setHeaders(resp.headers().toMultimap());
+        } catch (Exception e) {
+            log.error("", e);
+        }
+        return page;
+    }
+
+    @Override
+    public void setThread(int i) {
+        log.info(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>{}", i);
+    }
+
+
+    protected Page handleResponse(Request request, String charset, HttpResponse httpResponse, Task task) throws IOException {
+        byte[] bytes = IOUtils.toByteArray(httpResponse.getEntity().getContent());
+        String contentType = httpResponse.getEntity().getContentType() == null ? "" : httpResponse.getEntity().getContentType().getValue();
+        Page page = new Page();
+        page.setBytes(bytes);
+        if (charset == null) {
+            charset = this.getHtmlCharset(contentType, bytes);
+        }
+
+        page.setCharset(charset);
+        page.setRawText(new String(bytes, charset));
+
+        page.setUrl(new PlainText(request.getUrl()));
+        page.setRequest(request);
+        page.setStatusCode(httpResponse.getStatusLine().getStatusCode());
+        page.setDownloadSuccess(true);
+        page.setHeaders(HttpClientUtils.convertHeaders(httpResponse.getAllHeaders()));
+
+
+        return page;
+    }
+
+    private String getHtmlCharset(String contentType, byte[] contentBytes) throws IOException {
+        String charset = CharsetUtils.detectCharset(contentType, contentBytes);
+        if (charset == null) {
+            charset = Charset.defaultCharset().name();
+            log.warn("Charset autodetect failed, use {} as charset. Please specify charset in Site.setCharset()", Charset.defaultCharset());
+        }
+
+        return charset;
+    }
+}
+/**
+ * package util;
+ * <p>
+ * import java.io.BufferedReader;
+ * import java.io.File;
+ * import java.io.FileWriter;
+ * import java.io.InputStream;
+ * import java.io.InputStreamReader;
+ * import java.io.PrintWriter;
+ * <p>
+ * import us.codecraft.webmagic.Page;
+ * import us.codecraft.webmagic.Request;
+ * import us.codecraft.webmagic.selector.PlainText;
+ * <p>
+ * public class GetAjaxHtml {
+ * public static String getAjaxContent(String url) throws Exception {
+ * Runtime rt = Runtime.getRuntime();
+ * Process p = rt
+ * .exec("D:/phantomjs-2.1.1-windows/bin/phantomjs.exe D:/s.js "
+ * + url);
+ * InputStream is = p.getInputStream();
+ * BufferedReader br = new BufferedReader(new InputStreamReader(is));
+ * StringBuffer sbf = new StringBuffer();
+ * String tmp = "";
+ * while ((tmp = br.readLine()) != null) {
+ * sbf.append(tmp + "\n");
+ * }
+ * return sbf.toString();
+ * }
+ * <p>
+ * public static Page download(Request request) {
+ * Page page = new Page();
+ * try {
+ * String url = request.getUrl();
+ * String html = getAjaxContent(url);
+ * page.setRawText(html);
+ * page.setUrl(new PlainText(url));
+ * page.setRequest(request);
+ * return page;
+ * } catch (Exception e) {
+ * System.out.println("download出错了!");
+ * return page;
+ * }
+ * }
+ * <p>
+ * public static void main(String[] args) throws Exception {
+ * long start = System.currentTimeMillis();
+ * String result = getAjaxContent("http://www.taobao.com");
+ * System.out.println(result);
+ * // 创建新文件
+ * String path = "D:\\testFile\\taobao.html";
+ * PrintWriter printWriter = null;
+ * printWriter = new PrintWriter(new FileWriter(new File(path)));
+ * printWriter.write(result);
+ * printWriter.close();
+ * long end = System.currentTimeMillis();
+ * System.out.println("===============耗时:" + (end - start)
+ * + "===============");
+ * }
+ * }
+ */

+ 58 - 0
src/main/java/com/zhiqiyun/open/core/spiders/PhantomjsDownloader.java

@@ -0,0 +1,58 @@
+package com.zhiqiyun.open.core.spiders;
+
+import lombok.extern.slf4j.Slf4j;
+import org.openqa.selenium.phantomjs.PhantomJSDriver;
+import org.openqa.selenium.phantomjs.PhantomJSDriverService;
+import org.openqa.selenium.remote.DesiredCapabilities;
+import org.springframework.beans.factory.annotation.Value;
+import org.springframework.stereotype.Component;
+import us.codecraft.webmagic.Page;
+import us.codecraft.webmagic.Request;
+import us.codecraft.webmagic.Task;
+import us.codecraft.webmagic.downloader.Downloader;
+import us.codecraft.webmagic.selector.PlainText;
+
+import java.util.logging.Level;
+
+@Slf4j
+@Component("phantomjs.downloader")
+public class PhantomjsDownloader implements Downloader {
+
+    @Value("${spider.phantomjs_executable_path_property}")
+    private String phantomjsPath;
+
+    @Override
+    public Page download(Request request, Task task) {
+        try {
+            log.info(request.getUrl());
+            DesiredCapabilities caps = new DesiredCapabilities();
+            caps.setCapability(PhantomJSDriverService.PHANTOMJS_EXECUTABLE_PATH_PROPERTY, phantomjsPath);
+            caps.setJavascriptEnabled(true); // enabled by default
+            caps.setCapability("phantomjs.page.settings.loadImages", false);
+            caps.setCapability("phantomjs.page.settings.userAgent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36");
+            PhantomJSDriver driver = new PhantomJSDriver(caps);
+
+            driver.setLogLevel(Level.OFF);
+            driver.get(request.getUrl());
+
+            String rawText = driver.getPageSource();
+
+            Page page = new Page();
+            page.setBytes(rawText.getBytes());
+            page.setRawText(rawText);
+            page.setUrl(new PlainText(request.getUrl()));
+            page.setRequest(request);
+            page.setDownloadSuccess(true);
+            return page;
+        } catch (Exception e) {
+            log.error("", e);
+        }
+        return null;
+    }
+
+    @Override
+    public void setThread(int i) {
+        log.info(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>{}", i);
+    }
+
+}

+ 0 - 166
src/main/java/com/zhiqiyun/open/mvc/controller/PopularFeelingsController.java

@@ -1,166 +0,0 @@
-package com.zhiqiyun.open.mvc.controller;
-
-import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper;
-import com.baomidou.mybatisplus.core.metadata.OrderItem;
-import com.baomidou.mybatisplus.extension.plugins.pagination.Page;
-import com.zhiqiyun.open.annotation.Permission;
-import com.zhiqiyun.open.core.models.statistics.PopularFeelings;
-import com.zhiqiyun.open.core.models.statistics.PopularFeelingsPage;
-import com.zhiqiyun.open.core.models.user.OauthInfo;
-import com.zhiqiyun.open.core.service.OauthService;
-import com.zhiqiyun.open.core.service.PopularFeelingsPageService;
-import com.zhiqiyun.open.core.service.PopularFeelingsService;
-import com.zhiqiyun.open.core.service.SequenceService;
-import com.zhiqiyun.open.mvc.Result;
-import com.zhiqiyun.open.mvc.params.statistics.QueryPopularFeelingsPageParam;
-import com.zhiqiyun.open.mvc.params.statistics.QueryPopularFeelingsParam;
-import com.zhiqiyun.open.mvc.params.statistics.SavePopularFeelingsParam;
-import com.zhiqiyun.open.utils.DateUtil;
-import com.zhiqiyun.open.utils.ServletContext;
-import org.apache.commons.lang3.StringUtils;
-import org.springframework.beans.BeanUtils;
-import org.springframework.beans.factory.annotation.Autowired;
-import org.springframework.web.bind.annotation.PostMapping;
-import org.springframework.web.bind.annotation.RequestBody;
-import org.springframework.web.bind.annotation.RequestMapping;
-import org.springframework.web.bind.annotation.RestController;
-
-import javax.validation.Valid;
-import java.util.List;
-
-/**
- * @author jtoms
- */
-@RestController
-@RequestMapping("/popular/feelings")
-public class PopularFeelingsController {
-
-	@Autowired
-	private PopularFeelingsService popularFeelingsService;
-
-	@Autowired
-	private PopularFeelingsPageService popularFeelingsPageService;
-
-	@Autowired
-	private OauthService oauthService;
-
-	@Autowired
-	private SequenceService sequenceService;
-
-
-	@Permission(value = "popular.feelings.find", tags = "查询舆情监控")
-	@PostMapping("/findPage")
-	public Result findPage(@RequestBody QueryPopularFeelingsParam param) {
-
-		QueryWrapper<PopularFeelings> wrapper = new QueryWrapper<>();
-
-		if (StringUtils.isNotBlank(param.getTitle())) {
-			wrapper.like("title", param.getTitle());
-		}
-
-		if (StringUtils.isNotBlank(param.getKeywords())) {
-			wrapper.like("keywords", param.getKeywords());
-		}
-
-		if (StringUtils.isNotBlank(param.getSiteUrl())) {
-			wrapper.like("site_url", param.getSiteUrl());
-		}
-
-		Page<PopularFeelings> page = param.getPage();
-		page.addOrder(OrderItem.desc("id"));
-
-		Page<PopularFeelings> resultData = this.popularFeelingsService.page(page, wrapper);
-		for (PopularFeelings popular : resultData.getRecords()) {
-//            popular.setStatus(this.popularFeelingsService.getStatus(popular.getId()));
-
-			QueryWrapper<PopularFeelingsPage> wrapperPage = new QueryWrapper<>();
-			wrapperPage.eq("popular_feelings_id", popular.getId());
-
-			long spiderCount = this.popularFeelingsPageService.count(wrapperPage);
-			popular.setSpiderCount(spiderCount);
-		}
-
-		return Result.instance(Result.Code.SUCCESS).setData(resultData);
-	}
-
-	@Permission(value = "popular.feelings.add", tags = "新建舆情监控")
-	@PostMapping("/save")
-	public Result save(@Valid @RequestBody SavePopularFeelingsParam param) throws Exception {
-
-		PopularFeelings entity = new PopularFeelings();
-		BeanUtils.copyProperties(param, entity);
-
-		OauthInfo oauthInfo = this.oauthService.getAuth(ServletContext.getAccessToken());
-
-		entity.setCreatedTime(DateUtil.current());
-		entity.setCreatedBy(oauthInfo.getId());
-
-		entity.setUpdatedTime(DateUtil.current());
-		entity.setUpdatedBy(oauthInfo.getId());
-		entity.setId(this.sequenceService.nextId());
-
-		this.popularFeelingsService.save(entity);
-
-		return Result.instance(Result.Code.MESSAGE_SUCCESS);
-	}
-
-	@Permission(value = "popular.feelings.edit", tags = "更新舆情监控")
-	@PostMapping("/updateById")
-	public Result updateById(Long id, @Valid @RequestBody SavePopularFeelingsParam param) {
-		PopularFeelings entity = new PopularFeelings();
-		BeanUtils.copyProperties(param, entity);
-
-		OauthInfo oauthInfo = this.oauthService.getAuth(ServletContext.getAccessToken());
-
-		entity.setUpdatedBy(oauthInfo.getId());
-		entity.setUpdatedTime(DateUtil.current());
-		entity.setId(id);
-
-		this.popularFeelingsService.updateById(entity);
-
-		return Result.instance(Result.Code.MESSAGE_SUCCESS);
-	}
-
-	@Permission(value = "popular.feelings.delete", tags = "删除舆情监控")
-	@PostMapping("/deleteByIds")
-	public Result deleteByIds(@RequestBody List<Long> ids) {
-		this.popularFeelingsService.removeByIds(ids);
-		return Result.instance(Result.Code.MESSAGE_SUCCESS);
-	}
-
-	@Permission(value = "popular.feelings.start", tags = "启动舆情监控")
-	@PostMapping("/startSpider")
-	public Result startSpider(@RequestBody List<Long> ids) {
-		List<PopularFeelings> listData = this.popularFeelingsService.listByIds(ids);
-		for (PopularFeelings popularFeelings : listData) {
-//            this.popularFeelingsService.start(popularFeelings);
-		}
-		return Result.instance(Result.Code.MESSAGE_SUCCESS);
-	}
-
-	@Permission(value = "popular.feelings.stop", tags = "停止舆情监控")
-	@PostMapping("/stopSpider")
-	public Result stopSpider(@RequestBody List<Long> ids) {
-		List<PopularFeelings> listData = this.popularFeelingsService.listByIds(ids);
-		for (PopularFeelings popularFeelings : listData) {
-//            this.popularFeelingsService.stop(popularFeelings);
-		}
-		return Result.instance(Result.Code.MESSAGE_SUCCESS);
-	}
-
-	@Permission(value = "popular.feelings.find", tags = "查询舆情监控详情")
-	@PostMapping("/findDetailPage")
-	public Result findDetailPage(@RequestBody QueryPopularFeelingsPageParam param) {
-
-		QueryWrapper<PopularFeelingsPage> wrapper = new QueryWrapper<>();
-		wrapper.select("id", "popular_feelings_id", "url", "title", "keywords", "description", "spider_time", "update_time");
-		wrapper.eq("popular_feelings_id", param.getPopularFeelingsId());
-
-		Page<PopularFeelingsPage> page = param.getPage();
-		page.addOrder(OrderItem.desc("id"));
-
-		Page<PopularFeelingsPage> resultData = this.popularFeelingsPageService.page(page, wrapper);
-
-		return Result.instance(Result.Code.SUCCESS).setData(resultData);
-	}
-}

+ 0 - 14
src/main/java/com/zhiqiyun/open/mvc/params/statistics/QueryPopularFeelingsPageParam.java

@@ -1,14 +0,0 @@
-package com.zhiqiyun.open.mvc.params.statistics;
-
-import com.zhiqiyun.open.mvc.params.QueryPageParams;
-import lombok.Data;
-import lombok.EqualsAndHashCode;
-
-/**
- * @author jtoms
- */
-@EqualsAndHashCode(callSuper = true)
-@Data
-public class QueryPopularFeelingsPageParam extends QueryPageParams {
-    private Long popularFeelingsId;
-}

+ 0 - 16
src/main/java/com/zhiqiyun/open/mvc/params/statistics/QueryPopularFeelingsParam.java

@@ -1,16 +0,0 @@
-package com.zhiqiyun.open.mvc.params.statistics;
-
-import com.zhiqiyun.open.mvc.params.QueryPageParams;
-import lombok.Data;
-import lombok.EqualsAndHashCode;
-
-/**
- * @author jtoms
- */
-@EqualsAndHashCode(callSuper = true)
-@Data
-public class QueryPopularFeelingsParam extends QueryPageParams {
-    private String title;
-    private String keywords;
-    private String siteUrl;
-}

+ 0 - 24
src/main/java/com/zhiqiyun/open/mvc/params/statistics/SavePopularFeelingsParam.java

@@ -1,24 +0,0 @@
-package com.zhiqiyun.open.mvc.params.statistics;
-
-import lombok.Data;
-
-import javax.validation.constraints.NotBlank;
-import javax.validation.constraints.NotEmpty;
-import java.util.List;
-
-/**
- * @author jtoms
- */
-@Data
-public class SavePopularFeelingsParam {
-    @NotBlank(message = "网站名称不能为空")
-    private String siteName;
-    @NotBlank(message = "监控舆论词不能为空")
-    private String keywords;
-    @NotBlank(message = "监控域名不能为空")
-    private String domain;
-
-    private String urlPatterns;
-    @NotEmpty(message = "至少输入一个启动地址")
-    private List<String> startUrls;
-}

+ 2 - 2
src/main/java/com/zhiqiyun/open/router/apis/SentimentSpiderApi.java

@@ -130,8 +130,8 @@ public class SentimentSpiderApi {
 	@ServiceMethod(method = "sentiment.spider.event.stopSpider", title = "停止舆情事件采集")
 	public OapResponse stopSpider(IdsRequest request) {
 		List<SentimentSpiderEvent> listData = this.sentimentSpiderEventService.listByIds(request.getIds());
-		for (SentimentSpiderEvent popularFeelings : listData) {
-			this.sentimentSpiderEventService.stop(popularFeelings);
+		for (SentimentSpiderEvent event : listData) {
+			this.sentimentSpiderEventService.stop(event);
 		}
 		return OapResponse.success();
 	}

+ 2 - 0
src/main/resources/application.properties

@@ -41,3 +41,5 @@ uploader.file-host=http://127.0.0.1:9800/src
 uploader.allow-file-types=jpg,jpeg,png,gif
 uploader.save-path=/tmp/uploads
 
+####################### spider config ###############################
+spider.phantomjs_executable_path_property=E:/software/phantomjs-2.1.1/bin/phantomjs.exe

+ 1 - 0
src/main/resources/logback-spring.xml

@@ -19,6 +19,7 @@
     <logger name="org.ehcache" level="INFO" />
     <logger name="org.apache.http" level="INFO" />
     <logger name="us.codecraft.webmagic" level="INFO" />
+    <logger name="org.asynchttpclient.netty.channel" level="INFO" />
 
     <property name="LOG_PATH" value="${LOGGER_ROOT_PATH}/${SPRING_APP_NAME}" />
     <property name="MAX_HISTORY" value="10" />

+ 49 - 45
src/test/java/com/zhiqiyun/SimplePageProcessor.java

@@ -3,70 +3,74 @@ package com.zhiqiyun;
 import com.alibaba.fastjson.JSON;
 import com.zhiqiyun.open.core.models.sentiment.SentimentSpiderSiteRule;
 import com.zhiqiyun.open.core.service.impl.SentimentSpiderEventServiceImpl;
+import com.zhiqiyun.open.core.spiders.OkHttpDownloader;
 import lombok.extern.slf4j.Slf4j;
+import okhttp3.OkHttpClient;
 import org.apache.commons.codec.digest.DigestUtils;
-import us.codecraft.webmagic.ResultItems;
 import us.codecraft.webmagic.Spider;
-import us.codecraft.webmagic.Task;
 import us.codecraft.webmagic.pipeline.Pipeline;
 
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
+import java.util.concurrent.TimeUnit;
 
 @Slf4j
 public class SimplePageProcessor {
-	public static void main(String[] args) {
-		SentimentSpiderSiteRule rule = new SentimentSpiderSiteRule();
-		rule.setStartUrls("https://www.cqn.com.cn/");
-		rule.setUrlPatterns("https://www.cqn.com.cn/([a-zA-Z]+)/content/([0-9]+)-([0-9]+)/([0-9]+)/content_([0-9]+).htm");
-		rule.setXpath("/html/body/div[4]/div[1]/div[3]/div[1]");
+    public static void main(String[] args) {
+        SentimentSpiderSiteRule rule = new SentimentSpiderSiteRule();
+        rule.setStartUrls("https://www.188420.com/");
+        rule.setUrlPatterns("https://www.188420.com/a/([0-9]+).html");
+        rule.setXpath("/html/body/div[3]/div[1]/div[1]");
 
-		String[] startUrlArray = rule.getStartUrls().split("\n");
+        String[] startUrlArray = rule.getStartUrls().split("\n");
 
-		Spider spider = Spider.create(new SentimentSpiderEventServiceImpl.SpiderEventProcessor(rule));
+        Spider spider = Spider.create(new SentimentSpiderEventServiceImpl.SpiderEventProcessor(rule));
 
 
-		List<Pipeline> pipelines = new ArrayList<>();
-		pipelines.add((resultItems, task) -> {
-			Map<String, Object> dataMap = resultItems.getAll();
+        List<Pipeline> pipelines = new ArrayList<>();
+        pipelines.add((resultItems, task) -> {
+            Map<String, Object> dataMap = resultItems.getAll();
 
-			String url = resultItems.getRequest().getUrl();
-			String id = DigestUtils.md5Hex(url.replace("http://", "").replace("https://", ""));
+            String url = resultItems.getRequest().getUrl();
+            String id = DigestUtils.md5Hex(url.replace("http://", "").replace("https://", ""));
 
-			String title = dataMap.getOrDefault("title", "").toString();
-			String keywords = dataMap.getOrDefault("keywords", "").toString();
-			String description = dataMap.getOrDefault("description", "").toString();
-			String bodyText = dataMap.getOrDefault("bodyText", "").toString();
-			String html = dataMap.getOrDefault("html", "").toString();
-			List<String> listFragments = JSON.parseArray(dataMap.getOrDefault("listFragments", "[]").toString(), String.class);
+            String title = dataMap.getOrDefault("title", "").toString();
+            String keywords = dataMap.getOrDefault("keywords", "").toString();
+            String description = dataMap.getOrDefault("description", "").toString();
+            String bodyText = dataMap.getOrDefault("bodyText", "").toString();
+            String html = dataMap.getOrDefault("html", "").toString();
+            List<String> listFragments = JSON.parseArray(dataMap.getOrDefault("listFragments", "[]").toString(), String.class);
 
-			String[] listKeywords = new String[]{"十三届五次会", "习近平"};
+            String[] listKeywords = new String[]{"十三届五次会", "习近平"};
 
-			boolean flag = true;
-			for (String keyword : listKeywords) {
-				if (!bodyText.contains(keyword)) {
-					flag = false;
-					break;
-				}
-			}
+            boolean flag = true;
+            for (String keyword : listKeywords) {
+                if (!bodyText.contains(keyword)) {
+                    flag = false;
+                    break;
+                }
+            }
 
-			listFragments.removeIf(o -> {
-				boolean hasContains = false;
-				for (String keyword : listKeywords) {
-					if (o.contains(keyword)) {
-						hasContains = true;
-						break;
-					}
-				}
-				return !hasContains;
-			});
+            listFragments.removeIf(o -> {
+                boolean hasContains = false;
+                for (String keyword : listKeywords) {
+                    if (o.contains(keyword)) {
+                        hasContains = true;
+                        break;
+                    }
+                }
+                return !hasContains;
+            });
 
-			log.info("{}>>>>>>>>{}>>>>>>>>{}", flag, listFragments.size(), bodyText);
-		});
-		spider.setPipelines(pipelines);
-		spider.setExitWhenComplete(true);
-		spider.addUrl(startUrlArray);
-		spider.runAsync();
-	}
+            log.info("{}>>>>>>>>{}>>>>>>>>{}>>>>>>>>{}", url, flag, listFragments.size(), bodyText);
+        });
+        spider.setPipelines(pipelines);
+        spider.setExitWhenComplete(true);
+        spider.addUrl(startUrlArray);
+        OkHttpClient.Builder builder = new OkHttpClient.Builder();
+        builder.connectTimeout(30, TimeUnit.SECONDS);
+        spider.setDownloader(new OkHttpDownloader());
+        spider.runAsync();
+    }
 }