extra *mapextra // 如果map中保存的key和value都没有包含指针,那么gc时就不需要对buckets里面的内容进行扫描,但是每个bucket本质上是一个链表,buckets头部保存的是每个bucket链表的头节点,这时候会将每个链表的后续节点保存到该字段内,从而gc时可以对这些后续节点进行扫描,防止被回收 }
// makehmap_small implements Go map creation for make(map[k]v) and // make(map[k]v, hint) when hint is known to be at most bucketCnt // at compile time and the map needs to be allocated on the heap. funcmakemap_small() *hmap { h := new(hmap) h.hash0 = fastrand() // 初始化hash seed return h }
// makemap implements Go map creation for make(map[k]v, hint). // If the compiler has determined that the map or the first bucket // can be created on the stack, h and/or bucket may be non-nil. // If h != nil, the map can be created directly in h. // If h.buckets != nil, bucket pointed to can be used as the first bucket. funcmakemap(t *maptype, hint int, h *hmap) *hmap { // 如果小于0或者过大,则设为0 if hint < 0 || hint > int(maxSliceCap(t.bucket.size)) { hint = 0 }
// initialize Hmap // 编译器可能会进行优化,在栈上创建map,因此h可能不为空 if h == nil { h = new(hmap) } h.hash0 = fastrand()
// find size parameter which will hold the requested # of elements B := uint8(0) // 根据传入的size来计算B的大小,这里的B是buckets的数量,前文说过,map最多可以容纳 0.65*2^B 个键值对,当超过这个阈值时,会执行扩容 for overLoadFactor(hint, B) { B++ } h.B = B
// allocate initial hash table // if B == 0, the buckets field is allocated lazily later (in mapassign) // If hint is large zeroing this memory could take a while. if h.B != 0 { var nextOverflow *bmap // 创建bucket数组,这里可能会预先分配几个bmap,后续可以直接使用而不需要执行内存分配 h.buckets, nextOverflow = makeBucketArray(t, h.B, nil) if nextOverflow != nil { h.extra = new(mapextra) h.extra.nextOverflow = nextOverflow } }
top := tophash(hash) // 取key的hash值的高八位 // 这里b是一个bmap指针,如果当前bmap没有,则查找下一个bmap for ; b != nil; b = b.overflow(t) { // 遍历当前bmap,这里bucketCnt是常量8,限制一个bmap中最多只有8个键值对 for i := uintptr(0); i < bucketCnt; i++ { // 先比较哈希值高八位,如果不相等则continue if b.tophash[i] != top { continue } // 计算当前的key的位置 k := add(unsafe.Pointer(b), dataOffset+i*uintptr(t.keysize)) // 是否间接存储(存储的是实际key的地址) if t.indirectkey { k = *((*unsafe.Pointer)(k)) } // 比较key是否相等 if alg.equal(key, k) { ...... } } }
get
与get相关的方法有多个:
1 2 3 4 5 6
// val := m[key] funcmapaccess1(t *maptype, h *hmap, key unsafe.Pointer)unsafe.Pointer // val,has := m[key] funcmapaccess2(t *maptype, h *hmap, key unsafe.Pointer)(unsafe.Pointer, bool) // for-range时,用于获取key和val funcmapaccessK(t *maptype, h *hmap, key unsafe.Pointer)(unsafe.Pointer, unsafe.Pointer)
// mapaccess1 returns a pointer to h[key]. Never returns nil, instead // it will return a reference to the zero object for the value type if // the key is not in the map. // NOTE: The returned pointer may keep the whole map live, so don't // hold onto it for very long. funcmapaccess1(t *maptype, h *hmap, key unsafe.Pointer)unsafe.Pointer { // 如果我们访问的map是nil或者map中没有存储内容,直接返回零值 if h == nil || h.count == 0 { return unsafe.Pointer(&zeroVal[0]) } // map不是并发安全的,如果存在写操作则panic,因此我们需要在多个goroutine对map进行并发读写时,需要加锁保护 if h.flags&hashWriting != 0 { throw("concurrent map read and map write") } alg := t.key.alg // 计算key的哈希值 hash := alg.hash(key, uintptr(h.hash0)) // 获取buckets数组长度的掩码 m := bucketMask(h.B) // 定位key所在的bucket b := (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize))) // 当前是否处于扩容操作,当发生扩容操作时,或重新分配buckets数组,并且需要将旧的buckets内的键值对迁移到新的buckets数组中,这个迁移过程不是一次性完成的而是分批完成的 if c := h.oldbuckets; c != nil { // 扩容操作有两种:第一种,如果是因为当前的键值对数量已经达到了阈值0.65*2^B,则会触发扩容,这时候新的buckets数组的数量为原来的2倍;第二种是当前键值对数量并没有达到阈值,但是当前存在太多的overflow bucket,可能是因为之前存储了太多的键值对,但是后面又被删除掉了,这时候有的bucket链表会比较长,但是实际上存储的键值对比较稀疏,这样会影响查找效率,这种情况会触发sameSizeGrow,即扩容时buckets数组长度与原来一致 // 如果是前一种,则原先的buckets数量的掩码是m>>1 if !h.sameSizeGrow() { // There used to be half as many buckets; mask down one more power of two. m >>= 1 } // 计算该key落在的oldbuckets中的哪个bucket中 oldb := (*bmap)(add(c, (hash&m)*uintptr(t.bucketsize))) // 如果当前bucket还没有迁移,则在该bucket中查询,迁移操作是按照bucket为单位进行的 if !evacuated(oldb) { b = oldb } } // 获取哈希值高八位 top := tophash(hash) // 遍历bmap链表 for ; b != nil; b = b.overflow(t) { for i := uintptr(0); i < bucketCnt; i++ { // 比较哈希值高八位 if b.tophash[i] != top { continue } k := add(unsafe.Pointer(b), dataOffset+i*uintptr(t.keysize)) if t.indirectkey { k = *((*unsafe.Pointer)(k)) } // 比较key是否相等 if alg.equal(key, k) { // 获取key对应的val,这里的计算公式在前面结构定义一节中已经说明过 v := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.valuesize)) // 如果这里存储的是val的地址 if t.indirectvalue { v = *((*unsafe.Pointer)(v)) } return v } } } return unsafe.Pointer(&zeroVal[0]) }
// Like mapaccess, but allocates a slot for the key if it is not present in the map. funcmapassign(t *maptype, h *hmap, key unsafe.Pointer)unsafe.Pointer { // 不允许写入空map if h == nil { panic(plainError("assignment to entry in nil map")) } // 如果有并发写操作,直接panic if h.flags&hashWriting != 0 { throw("concurrent map writes") } alg := t.key.alg // 计算key的哈希值 hash := alg.hash(key, uintptr(h.hash0))
// Set hashWriting after calling alg.hash, since alg.hash may panic, // in which case we have not actually done a write. // 设置写标志 h.flags |= hashWriting // 如果buckets数组为空,则会创建一个长度为1的buckets数组 if h.buckets == nil { h.buckets = newobject(t.bucket) // newarray(t.bucket, 1) }
again: // 计算目标bucket在buckets数组中的索引 bucket := hash & bucketMask(h.B) // 如果当前正在执行扩容操作,则会执行迁移操作,前面说过扩容时的迁移操作是分批进行的,而迁移是按照bucket为单位进行的,而触发迁移的时机是执行写操作 if h.growing() { // 该函数会执行迁移操作,迁移的目标是bucket%len_of_oldbuckets growWork(t, h, bucket) } // 计算要写入的bucket b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize))) // 计算哈希值高八位 top := tophash(hash) // 在bmap中要插入的索引,如果前文所说,一个bmap最多可以有8个键值对 var inserti *uint8 // 要插入key的地址 var insertk unsafe.Pointer // 对应的val的地址 var val unsafe.Pointer // 因为前面已经执行过扩容操作,因此写入只需要对新的bucket进行操作 for { // 遍历当前bmap for i := uintptr(0); i < bucketCnt; i++ { // 高八位哈希值不相同 if b.tophash[i] != top { // 对第一个遇到的空槽,设置为待定槽,因为可能在后续遍历中发现当前key已经存在,则直接返回对应的val就好了 if b.tophash[i] == empty && inserti == nil { inserti = &b.tophash[i] insertk = add(unsafe.Pointer(b), dataOffset+i*uintptr(t.keysize)) val = add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.valuesize)) } continue } // 这里说明哈希值高八位一致,因此需要比较key是否相等 k := add(unsafe.Pointer(b), dataOffset+i*uintptr(t.keysize)) if t.indirectkey { k = *((*unsafe.Pointer)(k)) } // 如果不相等,继续遍历下一个 if !alg.equal(key, k) { continue } // 如果key已经存在,直接返回对应的val地址就好了 if t.needkeyupdate { typedmemmove(t.key, k, key) } val = add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.valuesize)) // goto大法好 goto done } // 当前bmap没有找到对应的key,则继续遍历下一个bmap ovf := b.overflow(t) if ovf == nil { // 如果当前bucket已经遍历完成,则跳出 break } b = ovf } // 首先判断是否需要进行扩容:如果当前没有扩容操作正在执行并且数据达到阈值或者存在太多的bmap,则会触发扩容操作 if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) { // 触发扩容操作,然后跳到开头重新执行上面流程 hashGrow(t, h) goto again // Growing the table invalidates everything, so try again } // 如果不需要扩容操作,并且没有可以使用的空槽,则需要分配一个新的overflow bucket,这里是分配在链表尾部,当前的b指向的就是整条bmap链表的最后一个节点 if inserti == nil { // all current buckets are full, allocate a new one. newb := h.newoverflow(t, b) // 插入第一个槽中 inserti = &newb.tophash[0] insertk = add(unsafe.Pointer(newb), dataOffset) val = add(insertk, bucketCnt*uintptr(t.keysize)) }
// store new key/value at insert position // 设置key // 这里针对是否需要对key或者val进行间接存储的处理 if t.indirectkey { kmem := newobject(t.key) *(*unsafe.Pointer)(insertk) = kmem insertk = kmem } if t.indirectvalue { vmem := newobject(t.elem) *(*unsafe.Pointer)(val) = vmem } typedmemmove(t.key, insertk, key) *inserti = top h.count++ // map中元素数据加1
done: // 检查写操作标志位 if h.flags&hashWriting == 0 { throw("concurrent map writes") } // 清除写操作 h.flags &^= hashWriting if t.indirectvalue { val = *((*unsafe.Pointer)(val)) } return val }
// Set hashWriting after calling alg.hash, since alg.hash may panic, // in which case we have not actually done a write (delete). h.flags |= hashWriting
bucket := hash & bucketMask(h.B) // 如果正在扩容,先执行迁移操作 if h.growing() { growWork(t, h, bucket) } b := (*bmap)(add(h.buckets, bucket*uintptr(t.bucketsize))) top := tophash(hash) search: // 查找要删除的可以,因为前面已经执行了迁移操作,因此这里只需要在新的bucket中查找即可 for ; b != nil; b = b.overflow(t) { for i := uintptr(0); i < bucketCnt; i++ { if b.tophash[i] != top { continue } k := add(unsafe.Pointer(b), dataOffset+i*uintptr(t.keysize)) k2 := k if t.indirectkey { k2 = *((*unsafe.Pointer)(k2)) } if !alg.equal(key, k2) { continue } // 执行清除操作 // Only clear key if there are pointers in it. if t.indirectkey { *(*unsafe.Pointer)(k) = nil } elseif t.key.kind&kindNoPointers == 0 { memclrHasPointers(k, t.key.size) } v := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.valuesize)) if t.indirectvalue { *(*unsafe.Pointer)(v) = nil } elseif t.elem.kind&kindNoPointers == 0 { memclrHasPointers(v, t.elem.size) } else { memclrNoHeapPointers(v, t.elem.size) } b.tophash[i] = empty // 将tophash中的值设为empty,表示空槽 h.count-- // 数量减一 break search } }
// A hash iteration structure. type hiter struct { key unsafe.Pointer // key值,nil表示迭代结束 value unsafe.Pointer // val值 t *maptype h *hmap buckets unsafe.Pointer // bucket ptr at hash_iter initialization time bptr *bmap // 当前正在迭代的buckete的指针 overflow *[]*bmap // keeps overflow buckets of hmap.buckets alive oldoverflow *[]*bmap // keeps overflow buckets of hmap.oldbuckets alive startBucket uintptr// 记录这次迭代从哪个bucket开始 offset uint8// 记录这次迭代从hmap中的哪个offset开始 wrapped bool// already wrapped around from end of bucket array to beginning B uint8 i uint8 bucket uintptr checkBucket uintptr }
funcmapiterinit(t *maptype, h *hmap, it *hiter) { if h == nil || h.count == 0 { return }
it.t = t // 记录map类型信息 it.h = h // 引用map
// grab snapshot of bucket state it.B = h.B // 记录当前的B it.buckets = h.buckets // 记录当前的buckets // 如果当前map中的键值对没有指针 if t.bucket.kind&kindNoPointers != 0 { // Allocate the current slice and remember pointers to both current and old. // This preserves all relevant overflow buckets alive even if // the table grows and/or overflow buckets are added to the table // while we are iterating. h.createOverflow() it.overflow = h.extra.overflow it.oldoverflow = h.extra.oldoverflow }
// decide where to start // 这里随机选举一个开始迭代的位置,因此我们对map进行for-range操作,每次输出的序列都是不同的 r := uintptr(fastrand()) if h.B > 31-bucketCntBits { r += uintptr(fastrand()) << 31 } // 设置开始的bucket和offset it.startBucket = r & bucketMask(h.B) it.offset = uint8(r >> h.B & (bucketCnt - 1))
// 当前迭代的bucket索引 it.bucket = it.startBucket
// 设置迭代标志位 if old := h.flags; old&(iterator|oldIterator) != iterator|oldIterator { atomic.Or8(&h.flags, iterator|oldIterator) } // 执行next操作 mapiternext(it) }
funchashGrow(t *maptype, h *hmap) { // If we've hit the load factor, get bigger. // Otherwise, there are too many overflow buckets, // so keep the same number of buckets and "grow" laterally. bigger := uint8(1) // 如果键值对数量没有达到阈值,则说明是overflow bucket数量过多触发的 if !overLoadFactor(h.count+1, h.B) { bigger = 0// 这里将bigger设置成了0 // 设置标志位,用于后续区分是哪种情况触发的扩容 // 如果是overflow bucket数量过多触发的,实际上并不会增加bucket的数量 // 因此称作sameSizeGrow h.flags |= sameSizeGrow }