参考：BV1Jq4y1p7Rw

Redis数据库设计：hashtable

hashtable：里面只保存index，而index是指向 entry 的，entry 是有 next 指针的。
hash冲突：采用头插法，Redis是内存，表示你先插入的数据，可能最先访问到。
在这里插入图片描述

rehash机制

扩容搬运时机 1：访问的时候，定位到 old_index，然后把 old_index 下面的链表搬到 new_index 中。
扩容搬运时机 2：后台还有一个会定时轮询搬运，一次搬运 100 个。（如果直接全部搬运，数据量大的时候，可能会引起卡顿）

两个同时存在，怎么访问呢？
访问：先访问 old ，old 不存在，再访问 new。
插入：会插入到 new 不会插入到 old。
在这里插入图片描述

Redis key的数据类型

Redis 的 value 类型有很多：String，List，Set，ZSet，Hash，Geo，Hyperloglog，Bitmap，

那 Redis 的 key 呢，你不论传什么，传音频，视频，server 端也会转换为 String 类型。
在这里插入图片描述

key String 类型设计

string 在 c语言默认是char数组表示的：char [] data=“abc\0”
注意：虽然 \0 我们看不到，但是编译器会隐式帮我们加上 \0 字符串。因为 \0 表示字符串的结尾。

redis 如果设计：
key: data="abc\0def"
这样在我们只能读取到 abc   因为读到 \0 就已经结束了。

Redis 的字符串表示法：
sds：simple dynamic string

int len:7
char[]buf="abc\0def"       注意:  \0 只表示1个长度

这样就可以避免，读取到 \0 就结束了。

int len:7
char[]buf="abc\0def"
对于这个字符串，如果我想添加一个字符 g，怎么去处理呢？最先想到的就是对它进行copy 一份。
"abc\0def" "abc\0defg" 然后，把 char[]buf="abc\0defg"。但是字符串要是超级大，追加一个字符而开辟数组就会浪费。

解决方案：
int len:7
int allo: 0
char[]buf="abc\0def"

把数组 * 2，数组长度是 14。
添加一个字符后，char[]buf="abc\0defg"，len=8 而 allo=6 表示可用空间。
此时再添加字符串 "hi"，此时 char[]buf="abc\0defghi" len=10 allo=4   等到 allo 不够的时候，再扩容两倍。

在这里插入图片描述

RedisDB源码讲解

// Redis database representation.
// There are multiple databases identified by integers from 0 (the default database) up to the max configured database.
// The database number is the 'id' field in the structure. 
// 
// Redis database 表示。
// 有多个数据库由从0(默认数据库)到最大配置数据库的整数标识。
// 数据库编号是结构中的'id'字段。
typedef struct redisDb {//server.h
	// dict 就是 c语言的 hashmap
    dict *dict;                 /* The keyspace for this DB */
    dict *expires;              /* Timeout of keys with a timeout set */
    dict *blocking_keys;        /* Keys with clients waiting for data (BLPOP)*/
    dict *ready_keys;           /* Blocked keys that received a PUSH */
    dict *watched_keys;         /* WATCHED keys for MULTI/EXEC CAS */
    int id;                     /* Database ID */
    long long avg_ttl;          /* Average TTL, just for stats */
    unsigned long expires_cursor; /* Cursor of the active expire cycle. */
    list *defrag_later;         /* List of key names to attempt to defrag one by one, gradually. */
} redisDb;

typedef struct dict {//dict.h
    dictType *type;// 字典类型【详情见下面】
    void *privdata;
    dictht ht[2];// hash index ht[0] ht[1] rehash的两个表，ht就是hash table【详情见下面】
    long rehashidx;/* rehash定位的索引 rehashing not in progress if rehashidx == -1 */
    unsigned long iterators; /* number of iterators currently running */
} dict;

typedef struct dictType {//dict.h
    // hashFunction：hash函数，计算得到一个hashcode值
    uint64_t (*hashFunction)(const void *key);
    void *(*keyDup)(void *privdata, const void *key);
    void *(*valDup)(void *privdata, const void *obj);
    // keyCompare：k1=k2表示相同的key。k1!=k2，但index一样表示hash冲突
    int (*keyCompare)(void *privdata, const void *key1, const void *key2);
    void (*keyDestructor)(void *privdata, void *key);
    void (*valDestructor)(void *privdata, void *obj);
} dictType;

// This is our hash table structure.
// Every dictionary has two of this as we implement incremental rehashing, for the old to the new table.
// 
// 这是哈希表结构。
// 当我们实现增量rehash时，每个字典都有两个这样的表，从旧表到新表。
typedef struct dictht {//dict.h
    dictEntry **table;// hashtable 【详情见下面】
    unsigned long size;// hashtable size
    unsigned long sizemask;// size-1 : mod
    unsigned long used;// hashtable 有多少个元素
} dictht;

typedef struct dictEntry {//dict.h
    void *key;// string -> sds
    union {
        void *val;// value -> RedisObject 【详情见下面】
        uint64_t u64;
        int64_t s64;
        double d;
    } v;
    struct dictEntry *next;// next 指针，解决hash冲突
} dictEntry;

typedef struct redisObject {//server.h
    unsigned type:4;// 4bit string，list，set，zset，hash，...
    unsigned encoding:4;// 编码类型
    unsigned lru:LRU_BITS; /* LRU time (relative to global lru_clock) or
                            * LFU data (least significant 8 bits frequency
                            * and most significant 16 bits access time). */
    int refcount;// 4byte 引用计数，引用计数为0，就会回收
    void *ptr;// 指针，拿到真实的 k,v 数据。上面 type，encoding，都是描述信息。
} robj;

在这里插入图片描述

stringObject临界长度44


struct __attribute__ ((__packed__)) sdshdr8 {// sds.h
    uint8_t len; /* used */
    uint8_t alloc; /* excluding the header and null terminator */
    unsigned char flags; /* 3 lsb of type, 5 unused bits */
    char buf[];
};
len 8bit=1byte
alloc 8bit=1byte
flags 8bit=1byte
char buf[] = 默认占用1byte 兼容 \0
共 => 4byte

#define LRU_BITS 24
typedef struct redisObject {// server.h
    unsigned type:4;
    unsigned encoding:4;
    unsigned lru:LRU_BITS; /* LRU time (relative to global lru_clock) or
                            * LFU data (least significant 8 bits frequency
                            * and most significant 16 bits access time). */
    int refcount;
    void *ptr;
} robj;
type 4bit + encoding 4bit = 1byte
lru 24bit = 3byte
refcount 32bit = 4byte
ptr 64bit = 8byte
共 => 16byte


所以一个 stringObject => 默认占用20字节
CPU缓存行 cache line 默认 64字节，64-20=44 也就是 embstr 最大长度，stringObject长度为 45就转为 raw 了，再一块空间放不下，再放另外一块区域。
所以说，embstr回收一次内存，raw回收两次。



127.0.0.1:6379> set k1 v0123456789012345678901234567890123456789012
OK
127.0.0.1:6379> get k1
"v0123456789012345678901234567890123456789012"
127.0.0.1:6379> STRLEN k1
(integer) 44
127.0.0.1:6379> OBJECT encoding k1
"embstr" # 44 长度


127.0.0.1:6379> set k1 v01234567890123456789012345678901234567890123
OK
127.0.0.1:6379> STRLEN k1
(integer) 45
127.0.0.1:6379> OBJECT encoding k1
"raw" # 45 长度

编码转换

int => raw

127.0.0.1:6379> set k1 1
OK
127.0.0.1:6379> OBJECT encoding k1
"int"
127.0.0.1:6379> 
127.0.0.1:6379> 
127.0.0.1:6379> 
127.0.0.1:6379> APPEND k1 " is good num"
(integer) 13
127.0.0.1:6379> OBJECT encoding k1
"raw"

embstr => raw

127.0.0.1:6379> set k1 v1
OK
127.0.0.1:6379> OBJECT encoding k1
"embstr"
127.0.0.1:6379> 
127.0.0.1:6379> 
127.0.0.1:6379> 
127.0.0.1:6379> 
127.0.0.1:6379> APPEND k1 1
(integer) 3
127.0.0.1:6379> STRLEN k1
(integer) 3
127.0.0.1:6379> OBJECT encoding k1
"raw"

结语

（2021-11-05 11:35:27 后面我就没写了，我买了一本《Redis设计与实现》，看完后，也是对Redis内部有更近一步了解了。）

改造自己，总比禁止别人来得难。

Redis源码