怎么快速对mysql全表扫描
尽可能使用主键或者索引
使用主键缩小范围:
SELECT * FROM tablename where uid >= 7948212 and uid < 7948312 ORDER BY uid LIMIT 100
explain 性能:
id select_type table partitions type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t_userfriends range PRIMARY PRIMARY 4 1 100.00 Using where
使用主键比 limit + offset 快得多, 特别是100w行以上的大表
使用偏移量:
SELECT * FROM tablename ORDER BY uid LIMIT 100, OFFSET 100200
explain 性能:
id select_type table partitions type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t_userfriends ALL 2789921 100.00
offset越大, mysql引擎查询时, 跳过的行越多, 越耗时
使用并发查询
开启N个协程或线程, 根据实际的mysql配置来评估, 一般推荐3-10
每个协程读取的主键范围,独自计算, 是唯一的, 起始主键都不同
// 并行读取db,根据主键范围
func (a *Repo) scanDbV3() {
var ctx = context.Background()
var l sync.Mutex
var wg sync.WaitGroup
var totalNum int64
var dirtyNum int64
var info dbModel.Info
var start = time.Now()
var batchSize int64 = 3
var pageSize int64 = 100
var minUid int64 = 100000
var maxUid int64 = 10000000
// 寻找最大uid
err := a.db.Select("uid", "list").Order("uid DESC").Take(&info).Error
if err != nil {
log.Printf("寻找最大uid failed:%v\n", err)
return
}
maxUid = info.Uid
log.Printf("数据表中最大uid: %v\n", maxUid)
readDbRows := func(startUid int64) {
defer wg.Done()
for {
var infos = make([]*dbModel.Info, 0, pageSize)
err := a.db.Select("uid", "list").Where("uid > ? AND uid <= ?", startUid, startUid+pageSize).Order("uid").Find(&infos).Error
if err != nil {
log.Printf("协程读取数据出错,startUid %d: %v\n", startUid, err)
break
}
if len(infos) == 0 && startUid > maxUid {
log.Printf("协程读取数据完毕,startUid %d\n", startUid)
break
}
atomic.AddInt64(&totalNum, int64(len(infos)))
// 处理查询到的记录
wg.Add(1)
task := func() {
defer wg.Done()
// 处理业务
}
err = a.pool.Submit(task)
if err != nil {
log.Printf("pool.Submit failed:%v\n", err)
}
startUid += batchSize * pageSize // 4 个协程,每个协程偏移量间隔 400
}
}
// 启动 4 个协程
wg.Add(int(batchSize))
for i := int64(0); i < batchSize; i++ {
go readDbRows(minUid + i*pageSize)
}
defer func() {
cost := time.Now().Sub(start)
log.Printf("花费时间:%v, 表格上的人数:%v, 处理用户数据异常的玩家人数:%v \n", cost, totalNum, dirtyNum)
}()
wg.Wait()
}