1. git clone proxy_pool
$ git clone git@github.com:jhao104/proxy_pool.git
2. 解决github clone报错Conection refused
https://ask.hellobi.com/blog/safa/36775
3. 安装python3 的包
$ env364
$ pip install -r requirements.txt
4. 安装redis数据库
$ sudo yum install epel-release # 添加epel库
$ sudo yum update # 更新yum源
$ sudo yum -y install redis # 安装redis数据库
$ sudo systemctl start redis # 启动redis服务
$ redis-cli # 验证redis安装,进入命令行模式
修改redis密码设置:passwd01为自定义的密码
$ vi setting.py
DB_CONN = 'redis://:passwd01@127.0.0.1:8888/0'
:wq
$ sudo vi /etc/redis.conf
requirepass passwd01
:wq
$ sudo systemctl restart redis # 重启redis服务
$ systemctl start redis.service # 启动redis服务器
$ systemctl stop redis.service # 停止redis服务器
$ systemctl restart redis.service # 重新启动redis服务器
$ systemctl status redis.service # 获取redis服务器的运行状态
$ systemctl enable redis.service # 开机启动redis服务器
$ systemctl disable redis.service # 开机禁用redis服务器
4. 启动程序
$ python proxyPool.py schedule # 启动调度程序
$ python proxyPool.py server # 启动webApi服务
访问 http://127.0.0.1:5010/get 可随机获取一个代理
5. python程序
import requests
def get_proxy():
return requests.get("http://127.0.0.1:5010/get/").json()
def delete_proxy(proxy):
requests.get("http://127.0.0.1:5010/delete/?proxy={}".format(proxy))
def getHtml():
retry_count = 5
proxy = get_proxy().get("proxy")
while retry_count > 0:
try:
# 使用代理访问
url = 'http://www.example.com'
html = requests.get(url, proxies={"http": "http://{}".format(proxy)})
return html
except Exception:
retry_count -= 1
# 删除代理池中代理
delete_proxy(proxy)
return None
新安装
wget https://github.com/jhao104/proxy_pool/archive/refs/tags/2.3.0.zip
unzip 2.3.0.zip
cd proxy_pool-2.3.0
pyenv activate env374
pip install -r requirements.txt
vi setting.py
DB_CONN = 'redis://:password01@127.0.0.1:8888/0'
:wq
sudo yum install epel-release # 添加epel库
sudo yum update # 更新yum源
sudo yum -y install redis # 安装redis数据库
sudo systemctl start redis # 启动redis服务
sudo vi /etc/redis.conf
requirepass password01
:wq
sudo systemctl restart redis # 重启redis服务
./start.sh