* 通过composer下载
composer require owner888/phpspider
// composer.json
{
"require": {
"owner888/phpspider": "^2.1"
}
}
* 去掉讨厌的注释
https://doc.phpspider.org/demo-start.html
./vendor/owner888/phpspider/core/phpspider.php
/* Do NOT delete this comment */
// 彩蛋
$included_files = get_included_files();
$content = file_get_contents($included_files[0]);
if (!preg_match("#/\* Do NOT delete this comment \*/#", $content) || !preg_match("#/\* 不要删除这段注释 \*/#", $content))
{
$msg = "Unknown error...";
log::error($msg);
exit;
}
删掉这段恶心的代码
* 导入数据库文件
cd ./vendor/owner888/phpspider/demo
mysql -uroot -hlocalhost -p
create database demo charset utf8 collate utf8_general_ci;
\. qiushibaike.sql
# ************************************************************
# Sequel Pro SQL dump
# Version 4541
#
# http://www.sequelpro.com/
# https://github.com/sequelpro/sequelpro
#
# Host: 127.0.0.1 (MySQL 5.7.14)
# Database: demo
# Generation Time: 2016-10-20 16:55:11 +0000
# ************************************************************/*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */;
/*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */;
/*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */;
/*!40101 SET NAMES utf8 */;
/*!40014 SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0 */;
/*!40101 SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='NO_AUTO_VALUE_ON_ZERO' */;
/*!40111 SET @OLD_SQL_NOTES=@@SQL_NOTES, SQL_NOTES=0 */;# Dump of table content
# ------------------------------------------------------------DROP TABLE IF EXISTS `content`;CREATE TABLE `content` (
`id` int(11) unsigned NOT NULL AUTO_INCREMENT,
`depth` int(11) DEFAULT NULL,
`url` varchar(200) DEFAULT NULL,
`article_title` varchar(20) DEFAULT NULL,
`article_headimg` varchar(150) DEFAULT NULL,
`article_author` varchar(20) DEFAULT NULL,
`article_content` text,
`article_publish_time` int(10) DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;/*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */;
/*!40101 SET SQL_MODE=@OLD_SQL_MODE */;
/*!40014 SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS */;
/*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */;
/*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */;
/*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */;
* 创建./index.php
<?php
require './vendor/autoload.php';use phpspider\core\phpspider;$configs = [
'name' => '糗事百科',
'domains' => [
'qiushibaike.com',
'www.qiushibaike.com'
],
'scan_urls' => [
'http://www.qiushibaike.com/'
],
'content_url_regexes' => [
"http://www.qiushibaike.com/article/\d+"
],
'list_url_regexes' => [
"http://www.qiushibaike.com/8hr/page/\d+\?s=\d+"
],
'fields' => [
[
// 抽取内容页的文章内容
'name' => "article_content",
'selector' => "//*[@id='single-next-link']",
'required' => true
],
[
// 抽取内容页的文章作者
'name' => "article_author",
'selector' => "//div[contains(@class,'author')]//h2",
'required' => true
],
],
'log_show' => true,
'input_encoding' => 'utf-8',
'output_encoding' => 'utf-8',
'db_config' => [
'host' => '127.0.0.1',
'user' => 'root',
'pass' => '',
'name' => 'demo',
'port' => 3306
],
/*
'export' => [
'type' => 'sql',
'file' => './data/sql/qiushibaike.sql'
]
*/
'export' => [
'type' => 'db',
'table' => 'content',
]
];$spider = new phpspider($configs);
$spider->start();
* Run
php ./index.php