前言

最近下了某个数据库,十几GB的纯文本数据也没法直接看,就写了个脚本自动导进数据库

执行效果

本文仅分享数据处理脚本,不提供数据及其下载方式

QQ

'''
# @Author       : Chr_
# @Date         : 2020-11-25 15:51:26
# @LastEditors  : Chr_
# @LastEditTime : 2020-11-25 18:03:46
# @Description  : QQ
'''
import pymysql
# 请自行修改为自己的数据库,数据表名为qq,包含id,qq,phone两个字段
db = pymysql.connect("localhost", "root", "123456", "shegong")
sql = 'INSERT INTO `shegong`.`qq`(`qq`, `phone`) VALUES (%s, %s)'
cursor = db.cursor()
print('开始处理')
with open('6.9更新总库.txt',encoding='utf-8') as f:
    j = 0
    i = 0
    pd = False
    for line in f:
        q,*_,p = line.split('----')
        # 如果运行中断,把数据库最后一条数据替换下面的值再重新运行即可
        if not pd and q == '3523406341':
            pd = True
            print('找到结尾')
        if not pd:
            continue
        try:
            cursor.execute(sql, (q, p))
            i += 1
            if i >= 100000:
                db.commit()
                i = 0
                j += 1
                print(f'{j}00000 条提交完毕')
        except Exception as e:
            print(f'存取出错 {q} {p} {e}')
            pass
print('文件尾')
db.commit()
db.close()
print('导入完成')

微博

'''
# @Author       : Chr_
# @Date         : 2020-11-25 16:24:16
# @LastEditors  : Chr_
# @LastEditTime : 2020-11-25 18:17:13
# @Description  : weibo
'''
import pymysql
# 请自行修改为自己的数据库数据表名为wb,包含id,uid,phone两个字段
db = pymysql.connect("localhost", "root", "123456", "shegong")
sql = 'INSERT INTO `shegong`.`wb`(`uid`, `phone`) VALUES (%s, %s)'
cursor = db.cursor()
print('开始处理')
with open('微博五亿2019.txt', encoding='utf-8') as f:
    j = 0
    i = 0
    pd = False
    for line in f:
        p,*_, uid = line.split()
        # 如果运行中断,把数据库最后一条数据替换下面的值再重新运行即可
        if not pd and uid == '15890981333':
            pd = True
            print('找到结尾')
        if not pd:
            continue
        try:
            cursor.execute(sql, (uid, p))
            i += 1
            if i >= 100000:
                db.commit()
                i = 0
                j += 1
                print(f'{j}00000 条提交完毕')
        except Exception as e:
            print(f'存取出错 {uid} {p} {e}')
            pass
print('文件尾')
db.commit()
db.close()
print('导入完毕')
最后修改:2020 年 12 月 02 日 02 : 15 PM