This repository was archived by the owner on Aug 13, 2018. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathSpiderCore.php
More file actions
137 lines (127 loc) · 4.21 KB
/
SpiderCore.php
File metadata and controls
137 lines (127 loc) · 4.21 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
<?php
/**
* Created by PhpStorm.
* User: jinyichen
* Date: 2018/5/30
* Time: 下午5:55
*/
class SpiderCore
{
//Curl操作
/**
* @var array 设计Header头
*/
public $httpHeader=array(
"Host:bcy.net",
"Connection: keep-alive",
"Accept:text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
"Upgrade-Insecure-Requests: 1",
"DNT:1",
"Accept-Language:zh-CN,zh;q=0.8",
"User-Agent:Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.101 Safari/537.36"
);
/**
* @param $url URL
* @param $cookie 设置cookie
* @return mixed 返回html
*/
public function get_content($url, $cookie)
{
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);//不返回数据
curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie); //读取cookie
curl_setopt($ch, CURLOPT_HTTPHEADER, $this->httpHeader
); // 设置浏览器的特定header
$rs = curl_exec($ch); //执行cURL抓取页面内容
curl_close($ch);
return $rs;
}
/**
* 登陆操作
* @param $url
* @param $cookie
* @param $post
*/
public function login_post($url, $cookie, $post,$oldCookie)
{
$curl = curl_init();//初始化curl模块
curl_setopt($curl, CURLOPT_URL, $url);//登录提交的地址
curl_setopt($curl, CURLOPT_HEADER, 0);//是否显示头信息
curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);//不返回数据
curl_setopt($curl, CURLOPT_COOKIEJAR, $cookie); //设置Cookie信息保存在指定的文件中
curl_setopt($curl, CURLOPT_POST, 1);//post方式提交
// curl_setopt($curl, CURLOPT_COOKIEFILE, $oldCookie); //读取cookie
curl_setopt($curl, CURLOPT_POSTFIELDS, http_build_query($post));//要提交的信息
$result = curl_exec($curl);//执行cURL
print_r($result);
curl_close($curl);//关闭cURL资源,并且释放系统资源
}
/**
* 获取Cookie get方式
* @param $url URL
* @param $cookie 保存名称
*/
public function CurlBcyToken($url, $cookie)
{
$ch = curl_init(); //初始化一个cURL会话
curl_setopt($ch, CURLOPT_URL, $url);//设置需要获取的 URL 地址
curl_setopt($ch, CURLOPT_HTTPHEADER, $this->httpHeader); // 设置浏览器的特定header
curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie); //设置Cookie信息保存在指定的文件中
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);//不返回数据
if (empty($result = curl_exec($ch))) {
print_r('无法连接半次元');
die();
};//执行一个cURL会话
curl_close($ch);
}
//文件操作
/**
* @param $filename 文件名
* @param $serialize 是否反序列号
* @return bool|mixed|string 返回信息,文件不存在返回false
*/
public function getFile($filename, $serialize)
{
if (!file_exists($filename)) {
return false;
}
if ($serialize) {
$file = file_get_contents($filename);
$result = unserialize($file);
return $result;
} else {
$result = file_get_contents($filename);
return $result;
}
}
/**
* 序列化写入信息
* @param $filename 文件名
* @param $content 写入信息
* @param $end 追加文件
*/
public function serializeFile($filename, $content, $end)
{
$text = serialize($content);
newFile($filename, $text, $end);
}
/**
* 写入信息
* @param $filename 文件名
* @param $content 写入信息
* @param $end 追加文件
*/
public function newFile($filename, $content, $end)
{
if (!file_exists($filename)) { //判断文件是否存在
touch($filename);
}
if ($end) {
file_put_contents($filename, $content, FILE_APPEND); //不覆盖写入
} else {
file_put_contents($filename, $content); //覆盖写入
}
}
}