首页 技术 正文
技术 2022年11月20日
0 收藏 659 点赞 3,622 浏览 2989 个字

* 通过composer下载

composer require owner888/phpspider

// composer.json

{
"require": {
"owner888/phpspider": "^2.1"
}
}

  

* 去掉讨厌的注释

https://doc.phpspider.org/demo-start.html

 ./vendor/owner888/phpspider/core/phpspider.php

/* Do NOT delete this comment */
        // 彩蛋
$included_files = get_included_files();
$content = file_get_contents($included_files[0]);
if (!preg_match("#/\* Do NOT delete this comment \*/#", $content) || !preg_match("#/\* 不要删除这段注释 \*/#", $content))
{
$msg = "Unknown error...";
log::error($msg);
exit;
}

 删掉这段恶心的代码

* 导入数据库文件

cd ./vendor/owner888/phpspider/demo

  

mysql -uroot -hlocalhost -p

  

create database demo charset utf8 collate utf8_general_ci;
\. qiushibaike.sql

  

# ************************************************************
# Sequel Pro SQL dump
# Version 4541
#
# http://www.sequelpro.com/
# https://github.com/sequelpro/sequelpro
#
# Host: 127.0.0.1 (MySQL 5.7.14)
# Database: demo
# Generation Time: 2016-10-20 16:55:11 +0000
# ************************************************************/*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */;
/*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */;
/*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */;
/*!40101 SET NAMES utf8 */;
/*!40014 SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0 */;
/*!40101 SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='NO_AUTO_VALUE_ON_ZERO' */;
/*!40111 SET @OLD_SQL_NOTES=@@SQL_NOTES, SQL_NOTES=0 */;# Dump of table content
# ------------------------------------------------------------DROP TABLE IF EXISTS `content`;CREATE TABLE `content` (
`id` int(11) unsigned NOT NULL AUTO_INCREMENT,
`depth` int(11) DEFAULT NULL,
`url` varchar(200) DEFAULT NULL,
`article_title` varchar(20) DEFAULT NULL,
`article_headimg` varchar(150) DEFAULT NULL,
`article_author` varchar(20) DEFAULT NULL,
`article_content` text,
`article_publish_time` int(10) DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;/*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */;
/*!40101 SET SQL_MODE=@OLD_SQL_MODE */;
/*!40014 SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS */;
/*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */;
/*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */;
/*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */;

* 创建./index.php

<?php
require './vendor/autoload.php';use phpspider\core\phpspider;$configs = [
'name' => '糗事百科',
'domains' => [
'qiushibaike.com',
'www.qiushibaike.com'
],
'scan_urls' => [
'http://www.qiushibaike.com/'
],
'content_url_regexes' => [
"http://www.qiushibaike.com/article/\d+"
],
'list_url_regexes' => [
"http://www.qiushibaike.com/8hr/page/\d+\?s=\d+"
],
'fields' => [
[
// 抽取内容页的文章内容
'name' => "article_content",
'selector' => "//*[@id='single-next-link']",
'required' => true
],
[
// 抽取内容页的文章作者
'name' => "article_author",
'selector' => "//div[contains(@class,'author')]//h2",
'required' => true
],
],
'log_show' => true,
'input_encoding' => 'utf-8',
'output_encoding' => 'utf-8',
'db_config' => [
'host' => '127.0.0.1',
'user' => 'root',
'pass' => '',
'name' => 'demo',
'port' => 3306
],
/*
'export' => [
'type' => 'sql',
'file' => './data/sql/qiushibaike.sql'
]
*/
'export' => [
'type' => 'db',
'table' => 'content',
]
];$spider = new phpspider($configs);
$spider->start();

  

* Run

php ./index.php

  

相关推荐
python开发_常用的python模块及安装方法
adodb:我们领导推荐的数据库连接组件bsddb3:BerkeleyDB的连接组件Cheetah-1.0:我比较喜欢这个版本的cheeta…
日期:2022-11-24 点赞:878 阅读:9,085
Educational Codeforces Round 11 C. Hard Process 二分
C. Hard Process题目连接:http://www.codeforces.com/contest/660/problem/CDes…
日期:2022-11-24 点赞:807 阅读:5,560
下载Ubuntn 17.04 内核源代码
zengkefu@server1:/usr/src$ uname -aLinux server1 4.10.0-19-generic #21…
日期:2022-11-24 点赞:569 阅读:6,409
可用Active Desktop Calendar V7.86 注册码序列号
可用Active Desktop Calendar V7.86 注册码序列号Name: www.greendown.cn Code: &nb…
日期:2022-11-24 点赞:733 阅读:6,182
Android调用系统相机、自定义相机、处理大图片
Android调用系统相机和自定义相机实例本博文主要是介绍了android上使用相机进行拍照并显示的两种方式,并且由于涉及到要把拍到的照片显…
日期:2022-11-24 点赞:512 阅读:7,819
Struts的使用
一、Struts2的获取  Struts的官方网站为:http://struts.apache.org/  下载完Struts2的jar包,…
日期:2022-11-24 点赞:671 阅读:4,902