UUBlog

Python基于Scrapy框架写个麻雀虽小五脏俱全的爬虫

Posted on 2017-03-28 Edited on 2025-06-11 In python

这个东西写有半个月了，最近工作忙没空理会，看了下数据，跑得还不错。

用各种框架和开源项目配合，站在巨人的肩膀上，轻轻松松完成一个麻雀虽小五脏俱全的爬虫。

采集URL，大规模URL去重，分类，入库，反爬虫。而完成这些，只需要寥寥不到三百行代码。

Scrapy工程目录如下

./
├── crawls # 开启持久化会产生一些记录文件
├── sbdspider
│   ├── __init__.py
│   ├── __init__.pyc
│   ├── items.py     # 定义要采集的字段
│   ├── items.pyc
│   ├── middlewares  # 中间件 主要是随机选择 UserAgent和代理IP 主要用来反爬虫
│   │   ├── __init__.py
│   │   ├── __init__.pyc
│   │   ├── RandomProxy.py
│   │   ├── RandomProxy.pyc
│   │   ├── RandomUserAgent.py
│   │   └── RandomUserAgent.pyc
│   ├── middlewares.py
│   ├── pipelines.py  # 入库MySQL
│   ├── pipelines.pyc
│   ├── scrapy_redis  # 用的九茶的模块 用Bloomfilter+redis去重
│   │   ├── BloomfilterOnRedis.py
│   │   ├── BloomfilterOnRedis.pyc
│   │   ├── connection.py
│   │   ├── connection.pyc
│   │   ├── dupefilter.py
│   │   ├── dupefilter.pyc
│   │   ├── __init__.py
│   │   ├── __init__.pyc
│   │   ├── isExists.py
│   │   ├── pipelines.py
│   │   ├── queue.py
│   │   ├── queue.pyc
│   │   ├── scheduler.py
│   │   ├── scheduler.pyc
│   │   ├── spiders.py
│   │   ├── spiders.pyc
│   │   └── tests.py
│   ├── settings.py  # 配置 pipeline、middlewares的引用声明主要在这里
│   ├── settings.pyc
│   └── spiders
│       ├── __init__.py
│       ├── __init__.pyc
│       ├── sobaidupan.py # 爬虫主体 主要是提取数据 分类
│       └── sobaidupan.pyc
└── scrapy.cfg

首先是字段的定义，我需要保存哪些信息

import scrapy

class SbdspiderItem(scrapy.Item):
    # define the fields for your item here like:
    # name = scrapy.Field()
    tid = scrapy.Field()    # 网盘类型ID
    cid = scrapy.Field()    # 资源分类ID
    uid = scrapy.Field()    # 资源用户ID
    name = scrapy.Field()
    avatar = scrapy.Field()
    title = scrapy.Field()  # 资源标题
    size = scrapy.Field()   # 资源大小
    url = scrapy.Field()    # 资源URL
    pwd = scrapy.Field()    # 资源密码
    description = scrapy.Field() # 资源描述
    available = scrapy.Field()   # 是否可用
    sharetime = scrapy.Field()   # 分享时间

然后我设计了这样的数据库表来保存它们：

-- phpMyAdmin SQL Dump
-- version 4.6.6
-- https://www.phpmyadmin.net/
--
-- Host: localhost
-- Generation Time: 2017-03-10 05:44:53
-- 服务器版本： 5.5.53-log
-- PHP Version: 5.5.38

SET SQL_MODE = "NO_AUTO_VALUE_ON_ZERO";
SET time_zone = "+00:00";


/*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */;
/*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */;
/*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */;
/*!40101 SET NAMES utf8mb4 */;

--
-- Database: `yzy_data`
--

-- --------------------------------------------------------

--
-- 表的结构 `yzy_class`
--

CREATE TABLE `yzy_class` (
  `id` int(11) NOT NULL,
  `cname` varchar(10) NOT NULL
) ENGINE=MyISAM DEFAULT CHARSET=utf8mb4;

--
-- 转存表中的数据 `yzy_class`
--
-- 这样我以后保存分类ID就够了，避免重复字段太多，占用太多数据库，也不方便数据大后索引
INSERT INTO `yzy_class` (`id`, `cname`) VALUES
(1, '其它'),
(2, '音乐'),
(3, '图片'),
(4, '电子书'),
(5, '文档'),
(6, '种子'),
(7, '手机APP'),
(8, '影视'),
(9, '无损音乐'),
(10, '教程');

-- --------------------------------------------------------

--
-- 表的结构 `yzy_resources`
--
-- 这个表才是重点 基本Items.py定义的都是保存到这里来
CREATE TABLE `yzy_resources` (
  `id` int(11) NOT NULL,
  `tid` tinyint(3) UNSIGNED NOT NULL,
  `cid` tinyint(3) UNSIGNED NOT NULL,
  `uid` int(11) NOT NULL,
  `title` varchar(80) NOT NULL,
  `size` varchar(10) NOT NULL,
  `url` varchar(255) NOT NULL,
  `pwd` varchar(10) NOT NULL,
  `description` varchar(100) NOT NULL,
  `available` tinyint(1) NOT NULL
) ENGINE=MyISAM DEFAULT CHARSET=utf8mb4;

-- --------------------------------------------------------

--
-- 表的结构 `yzy_type`
--
-- 留一个表保存资源类别，为以后多个类型网盘资源采集打下基础
CREATE TABLE `yzy_type` (
  `id` int(11) NOT NULL,
  `name` char(10) NOT NULL,
  `ename` char(10) NOT NULL,
  `shortname` char(4) NOT NULL,
  `url` varchar(255) NOT NULL
) ENGINE=MyISAM DEFAULT CHARSET=utf8mb4;

--
-- 转存表中的数据 `yzy_type`
--

INSERT INTO `yzy_type` (`id`, `name`, `ename`, `shortname`, `url`) VALUES
(1, '百度网盘', 'dupan', '度盘', 'https:/pan.baidu.com/');

-- --------------------------------------------------------

--
-- 表的结构 `yzy_users`
--
-- 保存网盘用户信息
CREATE TABLE `yzy_users` (
  `id` int(11) NOT NULL,
  `tid` tinyint(4) NOT NULL,
  `uid` varchar(20) NOT NULL,
  `uname` varchar(20) NOT NULL,
  `avatar` varchar(255) NOT NULL
) ENGINE=MyISAM DEFAULT CHARSET=utf8mb4;

--
-- Indexes for dumped tables
--

--
-- Indexes for table `yzy_class`
--
ALTER TABLE `yzy_class`
  ADD PRIMARY KEY (`id`);

--
-- Indexes for table `yzy_resources`
--
ALTER TABLE `yzy_resources`
  ADD PRIMARY KEY (`id`);

--
-- Indexes for table `yzy_type`
--
ALTER TABLE `yzy_type`
  ADD PRIMARY KEY (`id`);

--
-- Indexes for table `yzy_users`
--
ALTER TABLE `yzy_users`
  ADD PRIMARY KEY (`id`);

--
-- 在导出的表使用AUTO_INCREMENT
--

--
-- 使用表AUTO_INCREMENT `yzy_class`
--
ALTER TABLE `yzy_class`
  MODIFY `id` int(11) NOT NULL AUTO_INCREMENT, AUTO_INCREMENT=11;
--
-- 使用表AUTO_INCREMENT `yzy_resources`
--
ALTER TABLE `yzy_resources`
  MODIFY `id` int(11) NOT NULL AUTO_INCREMENT;
--
-- 使用表AUTO_INCREMENT `yzy_type`
--
ALTER TABLE `yzy_type`
  MODIFY `id` int(11) NOT NULL AUTO_INCREMENT, AUTO_INCREMENT=2;
--
-- 使用表AUTO_INCREMENT `yzy_users`
--
ALTER TABLE `yzy_users`
  MODIFY `id` int(11) NOT NULL AUTO_INCREMENT;
/*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */;
/*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */;
/*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */;

注意看注释，还是很好理解这些表干嘛用的。

pipelines.py获取到的Items怎么处理

这里有两个类，其实是两种处理方式，一种是默认的，我改了一下，采集到的数据，以JSON的形式保存。

sobaiduPipeline才是重点，主要有两次插入数据，一次是插入用户数据，一次获取到用户ID后，插入到yzy_resources表。

数据库的定义在 settings.py 里面

import json  
import MySQLdb
from scrapy.exceptions import DropItem
import settings

  
class SbdspiderPipeline(object):  
  
    def __init__(self):  
        self.file = open('items.jl', 'wb')  
  
    def process_item(self, item, spider):  
        line = json.dumps(dict(item)) + "\n"  
        self.file.write(line)  
        return item


# 入库到MySQL
class sobaiduPipeline(object):
    # 初始化连接
    def __init__(self):
        self.conn=MySQLdb.connect(host=settings.MYSQL_HOST,
                                  user=settings.MYSQL_USER,
                                  passwd=settings.MYSQL_PASS,
                                  db=settings.MYSQL_NAME,
                                  charset='utf8',
                                  use_unicode=True)
        self.curosr = self.conn.cursor()
    # 处理item
    def process_item(self,item,spider):
        try:
            userid = self.insert_user(item['uid'],item['name'],item['avatar'])
            sql="""INSERT INTO
                                yzy_resources(tid,cid,uid,title,size,url,pwd,description,available,sharetime)
                                VALUES('%d','%d','%d','%s','%s','%s','%s','%s','%d','%s')
                                """%(item['tid'],item['cid'],userid,item['title'],item['size'],item['url'],item['pwd'],item['description'],item['available'],item['sharetime'])
            vsql=sql.encode('utf8')
            self.curosr.execute(vsql)

        except MySQLdb.Error,e:
            print "Error:%d:%s" % (e.args[0],e.args[1])

        return item
    # 插入用户数据
    def insert_user(self,uid,name,pic):
        try:
            userid=0
            bSginal=self.curosr.execute("SELECT * FROM yzy_users WHERE uid='%s'"%(uid))
            if bSginal==1:
                results=self.curosr.fetchone()
                userid=results[0]
            else:
                sql = """INSERT INTO yzy_users(uid,uname,avatar)
                                    VALUES('%s','%s','%s')"""%(uid,name,pic)
                vsql = sql.encode('utf8')
                if self.curosr.execute(vsql)==1:
                    userid=self.curosr.lastrowid

        except MySQLdb.Error,e:
            print "Error:%d:%s" % (e.args[0], e.args[1])

        return userid

sobaidupan.py 蜘蛛的主体

# -*- coding: utf-8 -*-
from sbdspider.scrapy_redis.spiders import RedisSpider
# 这里我引入的是九茶的模块 RedisSpider
from scrapy.http import Request
from sbdspider.items import SbdspiderItem
import requests
import re
import datetime

class SobaidupanSpider(RedisSpider):
    name = "sobaidu"
    # class type keymap
    ckm_music=('mp3','wav','mid','wma','cda','acc')
    # id 2
    ckm_picture=('jpg','jpeg','png','gif','psd','bmp','svg','tga')
    # id 3
    ckm_ebook=('txt','pdf','mobi','azw','mbp','ebx')
    # id 4
    ckm_docfile=('doc','docx','wps','ppt','xls','xlsx')
    # id 5
    ckm_app=('apk','ipa','sis','sisx','xap')
    # id 6
    ckm_torrent=('torrent')
    # id 7
    ckm_movie=('mkv','rmvb','mp4','rm','avi','wmv','asf','asx','mpg','mpeg','mpe','3gp','flv','f4v','vob','mov') 
    # id 8 
    ckm_apeflac=('ape','flac')
    # id 9
    ckm_teach=(u'教程',u'入门',u'精讲',u'详解',u'课程')
    # id 10
    allowed_domains = ["www.sobaidupan.com"]
    redis_key = "sobaidupan:start_urls"
    start_urls = ['http://www.sobaidupan.com/']
    
    def start_requests(self):
        for u in self.start_urls:
            yield Request(u,callback=self.parse,
                                    errback=self.errback)

    def parse(self, response):
        yield self.parse_item(response)
        for a in response.css('a::attr(href)').extract():
            if not a:
                continue
            next_url = response.urljoin(a)
            yield Request(next_url,callback=self.parse)
    # 匹配字段    
    def parse_item(self,response):
        uid = re.search('user-(\d*)-1\.html',response.text)
        name = re.search(u'<div align="center">用户名：(.+?)</div></td>',response.text)
        avatar = re.search('<img src="(.+?)" width="100" height="100" border="0">',response.text)
        title = re.search('<h1>(.+?)</h1>',response.text)
        ressize = re.search(u'<B>资源大小：</B>(.+?)&nbsp;<b>',response.text)
        description = re.search(u'<B>资源类别：</B>(.+?)</div>',response.text)
        sharetime = re.search(u'<b>分享日期：</b>(.+?)</div>',response.text)
        res = re.search('href="(http://sbdp\.baidudaquan\.com/down\.asp\?id=.+?)"',response.text)
        if res is not None and title is not None:
            ssource = requests.get(res.group(1))
            ssource.encoding = 'utf-8'
            resurl = re.search("URL=(.+?)'",ssource.text)
            # re.search("URL=(http://pan\.baidu\.com/share/link\?shareid=.+?)'",ssource.text)
            if resurl is not None:
                item = SbdspiderItem()
                item['tid'] = 1
                item['cid'] = self.classifyRes(title.group(1))
                item['uid'] = uid.group(1)
                item['name'] = name.group(1)
                item['avatar'] = avatar.group(1)
                item['title'] = title.group(1)
                if ressize is not None:
                    item['size'] = ressize.group(1)
                else:
                    item['size'] = '未知'
                item['url'] = resurl.group(1)
                item['pwd'] = ''
                if description is not None:
                    item['description'] = description.group(1)
                else:
                    item['description'] = ''
                item['available'] = 1
                if sharetime is not None:
                    item['sharetime'] = sharetime.group(1)
                else:
                    dt = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
                    item['sharetime'] = dt
                return item
    
    # 大致给数据分类    
    def classifyRes(self,title):
        ext_title=''
        classid=1   # 初始化ID为1
        # 尝试提取后缀
        if len(title.split('.'))>=2:
            ext_title = title.split('.')[-1]
        else:
            ext_title = title
            ext_title.encoding = 'utf-8'
        
        # 按keymap分类
        if   ext_title in self.ckm_music:   
            classid = 2
        elif ext_title in self.ckm_picture:
            classid = 3
        elif ext_title in self.ckm_ebook:
            classid = 4
        elif ext_title in self.ckm_docfile:
            classid = 5
        elif ext_title in self.ckm_torrent:
            classid = 6
        elif ext_title in self.ckm_app:
            classid = 7
        elif ext_title in self.ckm_movie:
            classid = 8
        elif ext_title in self.ckm_apeflac:
            classid = 9
        else:
            for s in self.ckm_teach:
                if s in ext_title:
                    classid = 10
        return classid
            
    def errback(self, failure):
        pass

多IP代理采集反爬虫 IPProxys+RandomUserAgent

先下载安装这个 IPProxyPool 。

搭建成功后，运行有采集到数据的话，curl http://127.0.0.1:8000/?types=0&count=5&country=国内 可以看到返回json格式的数据。这样就成功一半了。

主要调用接口和随机切换代码 RandomProxy.py

class RandomProxy(object):
    def __init__(self):  
        self.r = requests.get(u'http://127.0.0.1:8000/?types=0&count=&country=国内')
        self.ip_ports=json.loads(self.r.text)

    def process_request(self, request, spider):
        # 数组中随机取一个
        ip_port=random.choice(self.ip_ports)
        # 拼接起来
        http_proxy="http://%s:%s"%(ip_port[0],ip_port[1])
        # 设置代理
        request.meta['proxy'] = http_proxy

最后怎么在服务器上挂机采集，爬虫持久化

1 2	nohup scrapy crawl sobaidu -s JOBDIR=crawls/sobaidu-1 1>/dev/null 2>logfile.log &

数据样本，5天28万的数据，主要是代理IP质量不高，否则速度还能上一个台阶，还不是分布式。

MariaDB [yzy_data]> select count(*) from yzy_resources;
+----------+
| count(*) |
+----------+
|   283888 |
+----------+
1 row in set (0.00 sec)

MariaDB [yzy_data]> select * from yzy_resources limit 10 \G;
*************************** 1. row ***************************
         id: 1
        tid: 1
        cid: 1
        uid: 0
      title: 《帝王师刘伯温[精品]》.epub
       size: 2.65 MB
        url: http://pan.baidu.com/share/link?shareid=2676964745&uk=4194551491&fid=646837282049387
        pwd: 
description: 人物传记
  available: 1
  sharetime: 2015-11-17 04:59:00
*************************** 2. row ***************************
         id: 2
        tid: 1
        cid: 4
        uid: 0
      title: 余念.txt
       size: 338.77 KB
        url: http://pan.baidu.com/s/1jIRZs7W
        pwd: 
description: /
  available: 1
  sharetime: 2017-02-14 07:37:00
*************************** 3. row ***************************
         id: 3
        tid: 1
        cid: 4
        uid: 0
      title: 《千山记》石头与水（晋江金牌推荐超高积分01-13更新至完结）.txt
       size: 4.07 MB
        url: http://pan.baidu.com/s/1geJHGll
        pwd: 
description: /
  available: 1
  sharetime: 2017-02-14 07:37:00
*************************** 4. row ***************************
         id: 4
        tid: 1
        cid: 8
        uid: 0
      title: （微博：小小精灵玩家520）政宗くんのリベンジ 09.mp4
       size: 195.44 MB
        url: http://pan.baidu.com/s/1c13Bcp6
        pwd: 
description: 高清动漫下载区/2017年1月新番/政宗君的复仇/（微博：小小精灵玩家520）政宗くんのリベンジ 09.mp4
  available: 1
  sharetime: 2017-03-04 05:31:00
*************************** 5. row ***************************
         id: 5
        tid: 1
        cid: 1
        uid: 0
      title: 04 Take It （Previously Unreleased）.m4a
       size: 5.71 MB
        url: http://pan.baidu.com/s/1ntAxIZJ
        pwd: 
description: /
  available: 1
  sharetime: 2017-03-12 17:16:00
*************************** 6. row ***************************
         id: 6
        tid: 1
        cid: 1
        uid: 0
      title: 表情.zip
       size: 4.96 MB
        url: http://pan.baidu.com/s/1gdd1XYV
        pwd: 
description: /
  available: 1
  sharetime: 2017-03-12 17:16:00
*************************** 7. row ***************************
         id: 7
        tid: 1
        cid: 1
        uid: 0
      title: 【艾薇儿饭团】07年Flare杂志.rar
       size: 563.13 KB
        url: http://pan.baidu.com/share/link?shareid=3408670202&uk=1042245391
        pwd: 
description: /
  available: 1
  sharetime: 2017-03-12 17:16:00
*************************** 8. row ***************************
         id: 8
        tid: 1
        cid: 1
        uid: 0
      title: 【艾薇儿饭团】2003滚石杂志.rar
       size: 3 MB
        url: http://pan.baidu.com/share/link?shareid=424894405&uk=1042245391
        pwd: 
description: /
  available: 1
  sharetime: 2017-03-12 17:16:00
*************************** 9. row ***************************
         id: 9
        tid: 1
        cid: 1
        uid: 0
      title: 【饭团资源】致敬艾薇儿.rar
       size: 75.64 MB
        url: http://pan.baidu.com/share/link?shareid=1371654997&uk=1042245391
        pwd: 
description: /
  available: 1
  sharetime: 2017-03-12 17:16:00
*************************** 10. row ***************************
         id: 10
        tid: 1
        cid: 1
        uid: 0
      title: AVRIL.Candy.zip
       size: 4.33 MB
        url: http://pan.baidu.com/s/1ntCy8sx
        pwd: 
description: /
  available: 1
  sharetime: 2017-03-12 17:16:00
10 rows in set (0.00 sec)

项目下载地址

sbdspider

参考资料

关注公众号尹安灿

nginx日志分割处理

Posted on 2017-03-27 Edited on 2025-06-11 In operations

nginx现在用yum源安装的话，都自带了logrotate，默认是每天定时分割日志并压缩。

但是公司目前有一台生产环境的nginx不是用yum安装的。所以就得自己手动分割下日志了。

目前就是bash脚本+crontab实现，具体代码如下：

#!/bin/bash
#初始化
# 日志路径
LOGS_PATH=/app/nginx/logs
YESTERDAY=$(date -d "yesterday" +%Y%m%d)
# 访问日志路径+名字
ACCESS_FILE_NAME=${LOGS_PATH}/sxyz_erp_nginx_access_${YESTERDAY}
# 错误日志路径+名字
ERROR_FILE_NAME=${LOGS_PATH}/sxyz_erp_nginx_error_${YESTERDAY}

#移动重命名访问日志 压缩并删除源文件
mv ${LOGS_PATH}/access.log ${ACCESS_FILE_NAME}.log
tar -zcf ${ACCESS_FILE_NAME}.tar.gz ${ACCESS_FILE_NAME}.log
rm -rf ${ACCESS_FILE_NAME}.log

#移动重命名错误日志 压缩并删除源文件
mv ${LOGS_PATH}/error.log ${ERROR_FILE_NAME}.log
tar -zcf ${ERROR_FILE_NAME}.tar.gz ${ERROR_FILE_NAME}.log
rm -rf ${ERROR_FILE_NAME}.log

# 向nginx进程发送 USR1 让它重新读取配置生成日志
kill -USR1 $(cat /app/nginx/logs/nginx.pid)

#删除20天前的日志
cd ${LOGS_PATH}
find . -mtime +20 -name "sxyz_erp_nginx_*" | xargs rm -f

exit 0

关注公众号尹安灿

密码的管理和使用

Posted on 2017-03-26 Edited on 2025-06-11 In security

一个弱口令往往是入侵的突破口之一。
拿IT从业者云集的CSDN600多万个密码统计出来的一个使用频次最高的密码列表，绝对是弱口令的佼佼者。

如下，弱口令不仅限于以下密码：

重复次数	密码	占比
235012	123456789	(5.82013097845522)
212749	12345678	(5.26878221339919)
76346	11111111	(1.89072779126658)
46053	dearbook	(1.14051406715741)
34952	00000000	(0.865595024760297)
19986	123123123	(0.494958290365624)
17790	1234567890	(0.440573800940881)
15033	88888888	(0.372296006157632)
6995	111111111	(0.173232925102949)
5965	147258369	(0.14772471740373)
5553	987654321	(0.137521434324042)
5459	aaaaaaaa	(0.135193500805861)
5145	1111111111	(0.127417212245128)
5025	66666666	(0.124445382221918)
4435	a123456789	(0.109833884607803)
4096	11223344	(0.101438464792234)
3667	1qaz2wsx	(0.0908141724592587)
3649	xiazhili	(0.0903683979557772)
3610	789456123	(0.0894025531982339)
3501	password	(0.0867031409271515)
3281	87654321	(0.0812547858845999)
3277	qqqqqqqq	(0.0811557248838262)
3175	000000000	(0.0786296693640977)
3143	qwertyuiop	(0.0778371813579084)
3094	qq123456	(0.076623684098431)
3080	iloveyou	(0.0762769705957231)
3061	31415926	(0.0758064308420482)
2985	12344321	(0.0739242718273486)
2885	0000000000	(0.0714477468080069)
2826	asdfghjkl	(0.0699865970465953)
2796	1q2w3e4r	(0.0692436395407928)
2580	123456abc	(0.0638943454990148)
2578	0123456789	(0.063844814998628)
2573	123654789	(0.0637209887476609)
2540	12121212	(0.0629037354912782)
2515	qazwsxedc	(0.0622846042364428)
2396	abcd1234	(0.0593375394634262)
2380	12341234	(0.0589412954603315)
2348	110110110	(0.0581488074541422)
2296	asdasdasd	(0.0568610144440845)
2243	22222222	(0.0555484561838334)
2166	123321123	(0.0536415319189404)
2160	abc123456	(0.0534929404177799)
2138	a12345678	(0.0529481049135247)
2131	123456	(0.0527747481621708)
2113	123456123	(0.0523289736586893)
2106	a1234567	(0.0521556169073354)
2100	1234qwer	(0.0520070254061749)
1989	qwertyui	(0.0492580826347056)
1986	123456789a	(0.0491837868841254)
1971	aa123456	(0.0488123081312241)
1918	asdfasdf	(0.047499749870973)
1891	99999999	(0.0468310881157508)
1859	123456aa	(0.0460386001095615)
1859	999999999	(0.0460386001095615)
1854	123456123456	(0.0459147738585944)
1699	520520520	(0.0420761600786148)
1656	963852741	(0.0410112543202979)
1652	55555555	(0.0409121933195242)
1652	741852963	(0.0409121933195242)
1589	33333333	(0.039351982557339)
1480	qwer1234	(0.0366525702862566)
1384	asd123456	(0.0342751062676886)
1339	77777777	(0.0331606700089848)
1316	qweasdzxc	(0.0325910692545363)
1285	code8925	(0.0318233464985403)
1273	11112222	(0.0315261634962193)
1268	ms0083jxj	(0.0314023372452523)
1245	zzzzzzzz	(0.0308327364908037)
1214	111222333	(0.0300650137348078)
1206	qweqweqwe	(0.0298668917332604)
1200	3.1415926	(0.0297183002320999)
1183	123456qq	(0.0292972909788118)
1148	147852369	(0.0284305072220423)
1136	521521521	(0.0281333242197213)
1119	asdf1234	(0.0277123149664332)
1111	123698745	(0.0275141929648858)
1109	1123581321	(0.027464662464499)
1058	asdfghjk	(0.0262016347046348)
1054	q1w2e3r4	(0.0261025737038611)
1037	12345678a	(0.025681564450573)
1003	woaini1314	(0.0248395459439969)
991	1234abcd	(0.0245423629416759)
988	123qweasd	(0.0244680671910956)
975	1qazxsw2	(0.0241461189385812)
967	woaiwojia	(0.0239479969370339)
920	321321321	(0.0227840301779433)
910	05962514787	(0.0225363776760091)
894	123456987	(0.0221401336729144)
892	kingcom5	(0.0220906031725276)
882	5845201314	(0.0218429506705934)
882	zxcvbnm123	(0.0218429506705934)
852	0987654321	(0.0210999931647909)
847	wwwwwwww	(0.0209761669138239)
835	11111111111111111111	(0.0206789839115029)
805	12345600	(0.0199360264057004)
783	11235813	(0.0193911909014452)
777	1q2w3e4r5t	(0.0192425994002847)
772	10101010	(0.0191187731493176)
770	123456asd	(0.0190692426489308)
765	lilylily	(0.0189454163979637)
744	12345612	(0.018425346143902)
741	5201314520	(0.0183510503933217)
740	1234554321	(0.0183262851431283)
732	12301230	(0.018128163141581)
729	woshishui	(0.0180538673910007)
727	123456654321	(0.0180043368906139)
726	xiaoxiao	(0.0179795716404205)
713	qwe123456	(0.017657623387906)
708	woaini123	(0.017533797136939)
702	111111	(0.0173852056357785)
693	1122334455	(0.0171623183840377)
685	12369874	(0.0169641963824904)
680	12345679	(0.0168403701315233)
669	100200300	(0.0165679523793957)
657	ffffffff	(0.0162707693770747)
651	buzhidao	(0.0161221778759142)
650	44444444	(0.0160974126257208)
649	woainima	(0.0160726473755274)
642	z123456789	(0.0158992906241735)
623	1234567a	(0.0154287508704985)
621	123456aaa	(0.0153792203701117)
618	qazwsx123	(0.0153049246195315)
616	ssssssss	(0.0152553941191446)
608	wojiushiwo	(0.0150572721175973)
601	25257758	(0.0148839153662434)
592	123321aa	(0.0146610281145026)
589	1357924680	(0.0145867323639224)
585	aaa123456	(0.0144876713631487)
578	369258147	(0.0143143146117948)
572	321654987	(0.0141657231106343)
571	q123456789	(0.0141409578604409)
570	qaz123456	(0.0141161926102475)
567	1233211234567	(0.0140418968596672)
567	9876543210	(0.0140418968596672)
565	wocaonima	(0.0139923663592804)
562	1234567b	(0.0139180706087001)
562	zhang123	(0.0139180706087001)
561	woaini520	(0.0138933053585067)
559	csdncsdn	(0.0138437748581199)
559	google250	(0.0138437748581199)
556	yangyang	(0.0137694791075396)
553	5845211314	(0.0136951833569594)
536	369369369	(0.0132741741036713)
535	20082008	(0.0132494088534779)
532	135792468	(0.0131751131028976)
525	299792458	(0.0130017563515437)
521	dddddddd	(0.0129026953507701)
519	zxczxczxc	(0.0128531648503832)
504	computer	(0.012481686097482)
501	qwerasdf	(0.0124073903469017)

为什么我们要使用弱口令

还有比方便和便于记忆这个更好的理由吗？

弱口令戳中了哪些痛点？

便于记忆
便于输入

弱口令就不能使用吗？这倒未必，看使用场景吧。

比如一些安全性较低的网站，而你注册后，可能又不会登记敏感的资料，这就可以使用弱口令。

从而避免网站被入侵后，黑客获取你的密码，社工渗透到你其它敏感的账户。

那么问题来了，应该在怎样的网站使用怎样的密码呢？

这里我说说我个人管理密码的一些策略吧。

密码的使用场景和应用

高频次使用，可以记录密码
高频次使用，不能记录密码，要手动输入
低频次密码，可以记录密码
低频次密码，要手动输入

1、 3、 4这种情况，建议还是使用生成的密码，无规律，很大程度可以防止被社工出密码，然后在登录的时候选择记住密码就可以了，现在基本浏览器、app大多都支持记住密码。再配合密码管理工具防止遗忘密码，基本不会与什么问题。

一次麻烦长久受益！

我觉得问题最大这种是高频次使用，但是不能记录密码的。

举个栗子：支付宝等支付密码、keepassx

建议使用比较强壮的密码，可以的话，大小写数字特殊符号混合，长度不小于8位。

这类密码自己可以设计多几套，按场景使用。

密码管理工具

个人是强烈建议使用Keepass的,如果配合TrueCrypt，那就安全性大大增强了。

KeePass官方网站

它有如下优点：

跨平台 windows mac linux Android iPhone 均可以使用
自动输入密码
密码生成
密码记录
密码分类管理

你还会因为怕输入麻烦和忘记密码而使用弱口令吗？

其它密码生成网站推荐：

在线随机密码生成工具

参考资料

CSDN杯我最喜欢的CSDN密码评选

关注公众号尹安灿

Linux下不错的Gif录制工具推荐

Posted on 2017-03-25 Edited on 2025-06-11 In tools

Windows下有很多优秀的录屏工具，前一天，我为了录个GIF，又开了自己的Windows 10.

现在的Ubuntu，越来越优秀了，很多Windows的工具，都能在这里找到替代品，我用Windows似乎越来越少了。

但是两个系统都有自己的优势，都有一些好用到哭的软件。

录屏这方面，其实我用比较多也就OBS Studio和Camtasia Studio，推荐的也就这两个比较多。

前者一般我用于录MP4格式的，后者用于录制GIF。录出来的体积小得感人，清晰度也不错。

OBS现在已经有了for linux版本，提前走向了all platform。Camtasia还没有Linux版本，倒是有了Mac版的。

这让我很眼红Mac。Gif有时候还是要录制的，Linux下有没有过得去的替代品呢？

一番查找还是有的。比如byzanz、peek、OBS+ffmepg等等，还有ttyrc这等奇葩。

byzanz

这个工具录制出来体积其实是最小的，但是有个很不方便的地方，就是必须先设定录制时长。

我特么大多时候怎么知道我录制屏幕要多久。不过这个就作为备选方案吧，毕竟有时候我还是知道我应该录多久的。

1 2	sudo apt install byzanz

peek

相对于 byzanz，有好用的GUI界面，简单的操作，可以拖动选定录制区域，自由决定录制时间。

这个也是我个人的主选方案

安装：

sudo add-apt-repository ppa:peek-developers/stable
sudo apt update
sudo apt install peek

Peek Github项目地址

OBS+ffmepg

这个我逗你玩。正常情况下，录制MP4再转GIF体积会比MP4还要大。不过如果用OBS录制的时候，调低速率，和减少每秒的帧数，再转换，还是能有不错的体积的。但是，麻不麻烦？

关注公众号尹安灿

svn泄露网站信息

Posted on 2017-03-24 Edited on 2025-06-11 In security

今天老板请团队吃饭，我和小林瞎侃，聊到信息安全。

突然想到之前上线每次都是整个目录复制发布，很有可能存在svn信息泄露问题。

最近一直在忙着架构方面的东西，把这茬的测试给忘了。

一回到家就开了电脑测试，惊出一身冷汗，简直是核弹。

话不多说，看图，

SVN LEAKING information

这里面图片仅仅是展示能够根据这些东西读取出文件目录。这里就发现了一个备份数据库。

实际危害比图片看到的大得多。

这件事情也是对我敲响了一个警钟，安全问题，还是刻不容缓。

怎么修复？

治标方法，删除所有.svn目录。

进入要发布的目录执行，会将所有.svn目录删除
1
2
find . -type d -name '.svn' | xargs rm -rf
治本方法，规范整个代码发布流程。

比如walle。如果用svn这类代码版本管理工具，最好用导出功能，别直接复制，.svn是隐藏目录。或者发布的时候，自己过滤掉这类敏感文件。
未雨绸缪, 设置web服务器拒绝访问这类后缀的文件,顺便把sql文件也捎上。

nginx -t 可以找到你的nginx配置文件
1
2
3
4
5
6
location ~ .*.(svn|git|cvs|sql) {
deny all;
}

#nginx -s reload

搞定，睡觉，明天估计要修改一大堆东西。

运维日志数据备份同步的几个方案

Posted on 2017-03-17 Edited on 2025-06-11 In operations

最近公司有这么一个需求。某ERP并发不高，web数据不大，单台服务器就能满足需求，暂时不需要负载均衡。

但是有对于服务高可用和数据安全需求比较高。所以想搞多一台服务器做灾备，实现高可用和数据备份。

选择服务器

目前业务主要面向华南地区的店，所以服务器得选华南地区的。

主服务在阿里云，从服务器也选阿里云，方便管理，内网同步速度略优。缺点就是阿里云出问题，大家一起挂，另外使用同个账户管理的机器，账户泄露两个机子都遭殃。

选非阿里云的服务器，能更大程度避免单点故障情况发生，缺点是管理略为麻烦，同步可能会稍微慢一点点，占用出口带宽，

怎么备份

目前打算至少存在两份异地备份。一份高频近乎实时同步的备份，一份低频定时备份到运维人员机子本地。

前面高频备份是高可用的基础，否则单点故障，备份服务器不能提供完整的数据。

而低频的本地备份可以在出现恶意的攻击，删库等行为后，依然有数据。

而这次备份的重点也是在线的同步备份。

昨天尝试使用 inotify-tools + rsync 用来发现创建修改文件等动作，然后触发 rsync 进行同步。

业务需求能满足，但是不好的是，每次事件触发都进行一次全局扫描，然后坐差异备份有点耗资源。

假如一次性通过命令创建几千个命令，那可能会触发几千次同步。

我想遇到这个问题的肯定不是我一人，度娘之，果然有针对这个问题的解决方案，那就是 sersync + rsync

根据设想的备份流程做了如下图 (绘图软件 dia 不能输入中文囧)
备份流程图

ERP服务器上运行 sersync 当inotify发现文件读写相关事件的时候，就向备份服务器发起同步，进行差异备份。
运维定时主动向备份服务器发起同步请求，同步备份数据回本地。

服务器配置

备份服务器配置

新建一个账户sync_backup作为同步的专用账户，设置用户shell为/sbin/nologin，直接用root太危险了，这样就算密码泄露了，也不会直接导致系统沦陷。

1
2
3

useradd sync_backup -s /sbin/nologin
passwd sync_backup --stdin  #建议用密码生成工具生成一个足够强壮的密码复制黏贴过来。

配置rsync服务

这玩意，你得指定它备份的项目名字，路径，用哪个账户验证，指定认证用户的密码文件。

并设置将密码文件权限设置为600，保证只有root用户才能获取它内容。

[root@yzy ~]# vim /etc/rsyncd.conf
# 2017-03-27 By YYY
uid = root
gid = root
use root = true
use chroot = no # 限制用户只能在备份目录
transfer logging = true
read only = no
max connections = 3600
slp refresh = 300
hosts allow = 1X0.X6.1X8.XXX # 源服务器IP 只允许源服务器连接
host deny = 0.0.0.0/32
pid file = /var/run/rsyncd.pid
lock file = /var/run/rsyncd.lock
log file = /var/log/rsyncd.log
##log format = %h %o %f %l %b
#
[web]
path = /www/web # 备份根路径
comment = Mirror to web server
list = false
read only = no # 设置非只读
auth users = sync_backup # 指定认证用户
secrets file = /etc/rsyncd.secrets # 指定密码文件
##exclude from = /etc/rsync/exclude.txt # 排除不做同步的列表 这里注释了 有需要再启用

密码文件

1 2	#vim /etc/rsyncd.secrets sync_backup:用户密码

1 2	chmod 600 /etc/rsyncd.secrets

启动rsync的守护进程并设置开机启动

尽量用vim改别echo 误操作一次就坑爹了，别问我为什么特别注明这个

#/usr/bin/rsync --deamon
#vim /etc/rc.local
/usr/bin/rsync --deamon

配置源服务器

为什么是先配置备份服务器的rsync,这样可以到源服务器，先尝试先同步看看备份服务器看看是否能正确同步。

这样利于一步一步排错。

1 2	sudo #rsync -avzP /测试同步的目录/ --password-file=/etc/rsyncd.secrets sync_backup@备份服务IP::web/

/etc/rsyncd.secrets 格式 密码，直接就是密码。

我这里测试没问题，就接着部署了sersync

部署`sersync`

下载安装 sersync

由于上Google Code要翻墙，我从GitHub下载了一份。

1 2	wget --no-check-certificate https://raw.githubusercontent.com/orangle/sersync/master/release/sersync2.5.4_64bit_binary_stable_final.tar.gz

Google Code sersync项目地址

sersync项目GitHub镜像

下载项目、解压、移动去 /usr/local/sersync/

wget --no-check-certificate https://raw.githubusercontent.com/orangle/sersync/master/release/sersync2.5.4_64bit_binary_stable_final.tar.gz
tar zxvf sersync2.5.4_64bit_binary_stable_final.tar.gz
sudo mv GNU-Linux-x86/ /usr/local/sersync/

配置sersync

关键的配置我都已经注释了。

<?xml version="1.0" encoding="ISO-8859-1"?>
<head version="2.5">
    <host hostip="localhost" port="8008"></host>
    <debug start="false"/>
    <fileSystem xfs="false"/>
    <filter start="true"> # 开启文件过滤
	<exclude expression="(.*)\.svn"></exclude> # 过滤内容 有些敏感文件不同步
	<exclude expression="(.*)\.gz"></exclude>
	<!--exclude expression="^info/*"></exclude-->
	<!--exclude expression="^static/*"></exclude-->
    </filter>
    <inotify>  # 监控哪些事件  默认就好
	<delete start="true"/>
	<createFolder start="true"/>
	<createFile start="false"/>
	<closeWrite start="true"/>
	<moveFrom start="true"/>
	<moveTo start="true"/>
	<attrib start="false"/>
	<modify start="false"/>
    </inotify>

    <sersync>
	<localpath watch="/www/web/shixin_dev_sync_test"> # 需要同步的目录
	    <remote ip="123.184.19.202" name="web"/> # 备份服务器信息
	    <!--<remote ip="192.168.8.39" name="tongbu"/>-->
	    <!--<remote ip="192.168.8.40" name="tongbu"/>-->
	</localpath>
	<rsync>
	    <commonParams params="-artuz"/> # rsync 参数
	    <auth start="true" users="sync_backup" passwordfile="/etc/rsyncd.secrets"/> # 备份服务器账户信息 记得 开启 true
	    <userDefinedPort start="false" port="874"/><!-- port=874 --> 备份的默认端口
	    <timeout start="false" time="100"/><!-- timeout=100 -->
	    <ssh start="false"/>
	</rsync>
	<failLog path="/tmp/rsync_fail_log.sh" timeToExecute="60"/><!--default every 60mins execute once-->
	<crontab start="false" schedule="600"><!--600mins-->
	    <crontabfilter start="false">
		<exclude expression="*.php"></exclude>
		<exclude expression="info/*"></exclude>
	    </crontabfilter>
	</crontab>
	<plugin start="false" name="command"/>
    </sersync>

    <plugin name="command">
	<param prefix="/bin/sh" suffix="" ignoreError="true"/>	<!--prefix /opt/tongbu/mmm.sh suffix-->
	<filter start="false">
	    <include expression="(.*)\.php"/>
	    <include expression="(.*)\.sh"/>
	</filter>
    </plugin>

    <plugin name="socket">
	<localpath watch="/opt/tongbu">
	    <deshost ip="192.168.138.20" port="8009"/>
	</localpath>
    </plugin>
    <plugin name="refreshCDN">
	<localpath watch="/data0/htdocs/cms.xoyo.com/site/">
	    <cdninfo domainname="ccms.chinacache.com" port="80" username="xxxx" passwd="xxxx"/>
	    <sendurl base="http://pic.xoyo.com/cms"/>
	    <regexurl regex="false" match="cms.xoyo.com/site([/a-zA-Z0-9]*).xoyo.com/images"/>
	</localpath>
    </plugin>
</head>

启动并设置开机启动

sudo /usr/local/sersync/sersync2 -r -d -o /usr/local/sersync/confxml.xml # 启动守护进程
vim /etc/rc.local
/usr/local/sersync/sersync2 -r -d -o /usr/local/sersync/confxml.xml

参考资料

关注公众号尹安灿

CentOS7安装Scrapy和一些爬虫会用到的服务

Posted on 2017-03-08 Edited on 2025-06-11 In python

讯云这几天搞活动，免费领了三个月的免费VPS。所以想把本地虚拟机的scrapy爬虫迁移上去。

Scrapy是基于Python写的。本来是要安装Python的，但是现在Linux基本都默认安装到Python了，所以这步略过。

我本地是Ubuntu，服务器我装了CentOS7.2。别问我为啥不用Debian，我瞎选的，没啥特别理由。

自己私人使用，为了方便，直接是root开干。

安装pip包管理

1 2	yum install -y python-pip

配置pip安装源为阿里云 http://mirrors.aliyun.com/help/pypi。

我觉得这是天朝必备的一个步骤，否则安装软件那龟速不忍直视。

mkdir ~/.pip
vim ~/.pip/pip.conf

# 改为如下内容
[global]
index-url = http://mirrors.aliyun.com/pypi/simple/

[install]
trusted-host=mirrors.aliyun.com

3, 安装编译器和开发库之类的，假如你们也没安装的话

1 2	yum -y install gcc gcc-c++ kernel-devel python-devel libxslt-devel libffi-devel openssl-devel

安装主角Scrapy

1 2	pip install scrapy

安装MySQL-python

Python连接MySQL的模块

1 2	easy_install MySQL-python

如果遇到mysql_config not found
之前的博文有提过这个错误，不过是在Ubuntu下的解决方案-安装libmysqlclient-dev
然而在CentOS的源中并没有这个包。而安装mysql-devel即可。

1 2	yum install -y mysql-devel

安装MySQL数据库 yum groupinstall mariadb mariadb-client

MariaDB其实可以看做MySQL新版吧，具体缘由自行百度。

安装redisNoSQL数据库
会用到配合bloomfilter做url去重。

1 2	yum install -y redis

安装 BloomFilter 模块

1 2	pip install bloomfilter

至此就差不多了，目前我就用到这些，另外requests和beautifulsoup4模块也装一下。

关注公众号尹安灿

python从零写一个采集器:入库MySQL

Posted on 2017-02-20 Edited on 2025-06-11 In python

采集到的数据，总归是要保存起来，保存到文件，或者数据库，方便以后取用。

思前想后，还是决定采用MySQL数据库，新版本叫MariaDB了，不过都是通用的。

安装`MySQL-python`模块

1	sudo easy_install MySQL-python

Issue:安装MySQL-Python出错mysql_config not found

插入数据

我这里MySQL位于我虚拟机172.25.254.18，数据库名yzy_data,用户名yzy_data，密码pass

我新建了一个表用于保存标题、用户ID、网盘URL
表结构如下

CREATE TABLE `test` (
  `id` int(11) NOT NULL,
  `title` varchar(255) NOT NULL,
  `uid` varchar(30) NOT NULL,
  `purl` varchar(255) NOT NULL
) ENGINE=MyISAM DEFAULT CHARSET=utf8mb4;

ALTER TABLE `test`
  ADD PRIMARY KEY (`id`);

ALTER TABLE `test`
  MODIFY `id` int(11) NOT NULL AUTO_INCREMENT;

新增了一个ID的字段，方便索引数据，这个字段我设置是自增的，所以平时插入数据留空即可。

插入一条数据，标题：测试插入、用户ID：123321、网盘URL：http://pan.baidu.com/xxx

代码如下

# 导入mysql库
import MySQLdb

#连接数据库 指定编码utf8
conn = MySQLdb.connect(host="172.25.254.18",user="yzy_data",passwd="pass",db="yzy_data",charset="utf8")
cursor = conn.cursor()

# 执行插入数据
sql = "insert into `test`(title,uid,purl) values(%s,%s,%s)"
param = ("测试插入","123321","http://pan.baidu.com/xxx")
n = cursor.execute(sql,param)
print "insert",n

cursor.close()

我这里是用的是MyISAM表引擎，所以并不需要插入后conn.commit()来提交才能插入数据。像Innodb这类支持事务的表都需要。

参考资料

python下的MySQLdb使用

关注公众号尹安灿

LANMP环境一键部署工具

Posted on 2017-02-18 Edited on 2025-06-11 In resources

部署Linux+Apache+nginx+MySQL+phpmyadmin。平时部署还是略显麻烦，尤其用于非生产环境的话，感觉好浪费体力。之前也一直想写个类似的脚本，一直懒得真正动手付诸实践。偶然遇见这个工具，感觉好方便。能满足需要。

https://lnmp.org/

关注公众号尹安灿

安装MySQL-Python出错mysql_config not found

Posted on 2017-02-18 Edited on 2025-06-11 In troubleshooting

用python需要用到mysql，遂想安装一个MySQL-Python.但是遇到如下错误：

➜  ~ git:(master) ✗ sudo easy_install MySQL-python
[sudo] password for phan: 
Searching for MySQL-python
Reading https://pypi.python.org/simple/MySQL-python/
Best match: MySQL-python 1.2.5
Downloading https://pypi.python.org/packages/a5/e9/51b544da85a36a68debe7a7091f068d802fc515a3a202652828c73453cad/MySQL-python-1.2.5.zip#md5=654f75b302db6ed8dc5a898c625e030c
Processing MySQL-python-1.2.5.zip
Writing /tmp/easy_install-oRVY0O/MySQL-python-1.2.5/setup.cfg
Running MySQL-python-1.2.5/setup.py -q bdist_egg --dist-dir /tmp/easy_install-oRVY0O/MySQL-python-1.2.5/egg-dist-tmp-tj3z87
sh: 1: mysql_config: not found
Traceback (most recent call last):
  File "/usr/bin/easy_install", line 9, in <module>
    load_entry_point('setuptools==20.7.0', 'console_scripts', 'easy_install')()
  File "/usr/lib/python2.7/dist-packages/setuptools/command/easy_install.py", line 2293, in main
    distclass=DistributionWithoutHelpCommands, **kw
  File "/usr/lib/python2.7/distutils/core.py", line 151, in setup
    dist.run_commands()
  File "/usr/lib/python2.7/distutils/dist.py", line 953, in run_commands
    self.run_command(cmd)
  File "/usr/lib/python2.7/distutils/dist.py", line 972, in run_command
    cmd_obj.run()
  File "/usr/lib/python2.7/dist-packages/setuptools/command/easy_install.py", line 414, in run
    self.easy_install(spec, not self.no_deps)
  File "/usr/lib/python2.7/dist-packages/setuptools/command/easy_install.py", line 663, in easy_install
    return self.install_item(spec, dist.location, tmpdir, deps)
  File "/usr/lib/python2.7/dist-packages/setuptools/command/easy_install.py", line 693, in install_item
    dists = self.install_eggs(spec, download, tmpdir)
  File "/usr/lib/python2.7/dist-packages/setuptools/command/easy_install.py", line 873, in install_eggs
    return self.build_and_install(setup_script, setup_base)
  File "/usr/lib/python2.7/dist-packages/setuptools/command/easy_install.py", line 1101, in build_and_install
    self.run_setup(setup_script, setup_base, args)
  File "/usr/lib/python2.7/dist-packages/setuptools/command/easy_install.py", line 1087, in run_setup
    run_setup(setup_script, args)
  File "/usr/lib/python2.7/dist-packages/setuptools/sandbox.py", line 246, in run_setup
    raise
  File "/usr/lib/python2.7/contextlib.py", line 35, in __exit__
    self.gen.throw(type, value, traceback)
  File "/usr/lib/python2.7/dist-packages/setuptools/sandbox.py", line 195, in setup_context
    yield
  File "/usr/lib/python2.7/contextlib.py", line 35, in __exit__
    self.gen.throw(type, value, traceback)
  File "/usr/lib/python2.7/dist-packages/setuptools/sandbox.py", line 166, in save_modules
    saved_exc.resume()
  File "/usr/lib/python2.7/dist-packages/setuptools/sandbox.py", line 141, in resume
    six.reraise(type, exc, self._tb)
  File "/usr/lib/python2.7/dist-packages/setuptools/sandbox.py", line 154, in save_modules
    yield saved
  File "/usr/lib/python2.7/dist-packages/setuptools/sandbox.py", line 195, in setup_context
    yield
  File "/usr/lib/python2.7/dist-packages/setuptools/sandbox.py", line 243, in run_setup
    DirectorySandbox(setup_dir).run(runner)
  File "/usr/lib/python2.7/dist-packages/setuptools/sandbox.py", line 273, in run
    return func()
  File "/usr/lib/python2.7/dist-packages/setuptools/sandbox.py", line 242, in runner
    _execfile(setup_script, ns)
  File "/usr/lib/python2.7/dist-packages/setuptools/sandbox.py", line 46, in _execfile
    exec(code, globals, locals)
  File "/tmp/easy_install-oRVY0O/MySQL-python-1.2.5/setup.py", line 17, in <module>
  File "/tmp/easy_install-oRVY0O/MySQL-python-1.2.5/setup_posix.py", line 43, in get_config
  File "/tmp/easy_install-oRVY0O/MySQL-python-1.2.5/setup_posix.py", line 25, in mysql_config
EnvironmentError: mysql_config not found

度娘后，安装了libmysqlclient-dev解决

1 2	➜ ~ git:(master) ✗ sudo apt-get install libmysqlclient-dev

安装MySQL-python 搞定

➜  ~ git:(master) ✗ sudo easy_install MySQL-python         
Searching for MySQL-python
Reading https://pypi.python.org/simple/MySQL-python/
Best match: MySQL-python 1.2.5
Downloading https://pypi.python.org/packages/a5/e9/51b544da85a36a68debe7a7091f068d802fc515a3a202652828c73453cad/MySQL-python-1.2.5.zip#md5=654f75b302db6ed8dc5a898c625e030c
Processing MySQL-python-1.2.5.zip
Writing /tmp/easy_install-RxRikL/MySQL-python-1.2.5/setup.cfg
Running MySQL-python-1.2.5/setup.py -q bdist_egg --dist-dir /tmp/easy_install-RxRikL/MySQL-python-1.2.5/egg-dist-tmp-oH_bfi
zip_safe flag not set; analyzing archive contents...
Moving MySQL_python-1.2.5-py2.7-linux-x86_64.egg to /usr/local/lib/python2.7/dist-packages
Adding MySQL-python 1.2.5 to easy-install.pth file

Installed /usr/local/lib/python2.7/dist-packages/MySQL_python-1.2.5-py2.7-linux-x86_64.egg
Processing dependencies for MySQL-python
Finished processing dependencies for MySQL-python

关注公众号尹安灿

项目下载地址

参考资料

为什么我们要使用弱口令

密码的使用场景和应用

密码管理工具

参考资料

byzanz

peek

OBS+ffmepg

怎么修复？

相关工具下载

选择服务器

怎么备份

服务器配置

备份服务器配置

配置源服务器

部署sersync

参考资料

安装MySQL-python模块

插入数据

参考资料

部署`sersync`

安装`MySQL-python`模块