Redis数据库
服务器结构
struct redisServer {
/* General */
pid_t pid; /* Main process pid. */
pthread_t main_thread_id; /* Main thread id */
char *configfile; /* Absolute config file path, or NULL */
char *executable; /* Absolute executable file path. */
char **exec_argv; /* Executable argv vector (copy). */
int dynamic_hz; /* Change hz value depending on # of clients. */
int config_hz; /* Configured HZ value. May be different than
the actual 'hz' field value if dynamic-hz
is enabled. */
mode_t umask; /* The umask value of the process on startup */
int hz; /* serverCron() calls frequency in hertz */
int in_fork_child; /* indication that this is a fork child */
redisDb *db;
dict *commands; /* Command table */
dict *orig_commands; /* Command table before command renaming. */
aeEventLoop *el;
rax *errors; /* Errors table */
unsigned int lruclock; /* Clock for LRU eviction */
volatile sig_atomic_t shutdown_asap; /* Shutdown ordered by signal handler. */
mstime_t shutdown_mstime; /* Timestamp to limit graceful shutdown. */
int last_sig_received; /* Indicates the last SIGNAL received, if any (e.g., SIGINT or SIGTERM). */
int shutdown_flags; /* Flags passed to prepareForShutdown(). */
int activerehashing; /* Incremental rehash in serverCron() */
int active_defrag_running; /* Active defragmentation running (holds current scan aggressiveness) */
char *pidfile; /* PID file path */
int arch_bits; /* 32 or 64 depending on sizeof(long) */
int cronloops; /* Number of times the cron function run */
char runid[CONFIG_RUN_ID_SIZE+1]; /* ID always different at every exec. */
int sentinel_mode; /* True if this instance is a Sentinel. */
size_t initial_memory_usage; /* Bytes used after initialization. */
int always_show_logo; /* Show logo even for non-stdout logging. */
int in_exec; /* Are we inside EXEC? */
int busy_module_yield_flags; /* Are we inside a busy module? (triggered by RM_Yield). see BUSY_MODULE_YIELD_ flags. */
const char *busy_module_yield_reply; /* When non-null, we are inside RM_Yield. */
char *ignore_warnings; /* Config: warnings that should be ignored. */
int client_pause_in_transaction; /* Was a client pause executed during this Exec? */
int thp_enabled; /* If true, THP is enabled. */
size_t page_size; /* The page size of OS. */
/* Modules */
dict *moduleapi; /* Exported core APIs dictionary for modules. */
dict *sharedapi; /* Like moduleapi but containing the APIs that
modules share with each other. */
dict *module_configs_queue; /* Dict that stores module configurations from .conf file until after modules are loaded during startup or arguments to loadex. */
list *loadmodule_queue; /* List of modules to load at startup. */
int module_pipe[2]; /* Pipe used to awake the event loop by module threads. */
pid_t child_pid; /* PID of current child */
int child_type; /* Type of current child */
/* Networking */
int port; /* TCP listening port */
int tls_port; /* TLS listening port */
int tcp_backlog; /* TCP listen() backlog */
char *bindaddr[CONFIG_BINDADDR_MAX]; /* Addresses we should bind to */
int bindaddr_count; /* Number of addresses in server.bindaddr[] */
char *bind_source_addr; /* Source address to bind on for outgoing connections */
char *unixsocket; /* UNIX socket path */
unsigned int unixsocketperm; /* UNIX socket permission (see mode_t) */
connListener listeners[CONN_TYPE_MAX]; /* TCP/Unix/TLS even more types */
uint32_t socket_mark_id; /* ID for listen socket marking */
connListener clistener; /* Cluster bus listener */
list *clients; /* List of active clients */
list *clients_to_close; /* Clients to close asynchronously */
list *clients_pending_write; /* There is to write or install handler. */
list *clients_pending_read; /* Client has pending read socket buffers. */
list *slaves, *monitors; /* List of slaves and MONITORs */
client *current_client; /* The client that triggered the command execution (External or AOF). */
client *executing_client; /* The client executing the current command (possibly script or module). */
#ifdef LOG_REQ_RES
char *req_res_logfile; /* Path of log file for logging all requests and their replies. If NULL, no logging will be performed */
unsigned int client_default_resp;
#endif
/* Stuff for client mem eviction */
clientMemUsageBucket* client_mem_usage_buckets;
rax *clients_timeout_table; /* Radix tree for blocked clients timeouts. */
int execution_nesting; /* Execution nesting level.
* e.g. call(), async module stuff (timers, events, etc.),
* cron stuff (active expire, eviction) */
rax *clients_index; /* Active clients dictionary by client ID. */
uint32_t paused_actions; /* Bitmask of actions that are currently paused */
list *postponed_clients; /* List of postponed clients */
pause_event client_pause_per_purpose[NUM_PAUSE_PURPOSES];
char neterr[ANET_ERR_LEN]; /* Error buffer for anet.c */
dict *migrate_cached_sockets;/* MIGRATE cached sockets */
redisAtomic uint64_t next_client_id; /* Next client unique ID. Incremental. */
int protected_mode; /* Don't accept external connections. */
int io_threads_num; /* Number of IO threads to use. */
int io_threads_do_reads; /* Read and parse from IO threads? */
int io_threads_active; /* Is IO threads currently active? */
long long events_processed_while_blocked; /* processEventsWhileBlocked() */
int enable_protected_configs; /* Enable the modification of protected configs, see PROTECTED_ACTION_ALLOWED_* */
int enable_debug_cmd; /* Enable DEBUG commands, see PROTECTED_ACTION_ALLOWED_* */
int enable_module_cmd; /* Enable MODULE commands, see PROTECTED_ACTION_ALLOWED_* */
/* RDB / AOF loading information */
volatile sig_atomic_t loading; /* We are loading data from disk if true */
volatile sig_atomic_t async_loading; /* We are loading data without blocking the db being served */
off_t loading_total_bytes;
off_t loading_rdb_used_mem;
off_t loading_loaded_bytes;
time_t loading_start_time;
off_t loading_process_events_interval_bytes;
/* Fields used only for stats */
time_t stat_starttime; /* Server start time */
long long stat_numcommands; /* Number of processed commands */
long long stat_numconnections; /* Number of connections received */
long long stat_expiredkeys; /* Number of expired keys */
double stat_expired_stale_perc; /* Percentage of keys probably expired */
long long stat_expired_time_cap_reached_count; /* Early expire cycle stops.*/
long long stat_expire_cycle_time_used; /* Cumulative microseconds used. */
long long stat_evictedkeys; /* Number of evicted keys (maxmemory) */
long long stat_evictedclients; /* Number of evicted clients */
long long stat_total_eviction_exceeded_time; /* Total time over the memory limit, unit us */
monotime stat_last_eviction_exceeded_time; /* Timestamp of current eviction start, unit us */
long long stat_keyspace_hits; /* Number of successful lookups of keys */
long long stat_keyspace_misses; /* Number of failed lookups of keys */
long long stat_active_defrag_hits; /* number of allocations moved */
long long stat_active_defrag_misses; /* number of allocations scanned but not moved */
long long stat_active_defrag_key_hits; /* number of keys with moved allocations */
long long stat_active_defrag_key_misses;/* number of keys scanned and not moved */
long long stat_active_defrag_scanned; /* number of dictEntries scanned */
long long stat_total_active_defrag_time; /* Total time memory fragmentation over the limit, unit us */
monotime stat_last_active_defrag_time; /* Timestamp of current active defrag start */
size_t stat_peak_memory; /* Max used memory record */
long long stat_aof_rewrites; /* number of aof file rewrites performed */
long long stat_aofrw_consecutive_failures; /* The number of consecutive failures of aofrw */
long long stat_rdb_saves; /* number of rdb saves performed */
long long stat_fork_time; /* Time needed to perform latest fork() */
double stat_fork_rate; /* Fork rate in GB/sec. */
long long stat_total_forks; /* Total count of fork. */
long long stat_rejected_conn; /* Clients rejected because of maxclients */
long long stat_sync_full; /* Number of full resyncs with slaves. */
long long stat_sync_partial_ok; /* Number of accepted PSYNC requests. */
long long stat_sync_partial_err;/* Number of unaccepted PSYNC requests. */
list *slowlog; /* SLOWLOG list of commands */
long long slowlog_entry_id; /* SLOWLOG current entry ID */
long long slowlog_log_slower_than; /* SLOWLOG time limit (to get logged) */
unsigned long slowlog_max_len; /* SLOWLOG max number of items logged */
struct malloc_stats cron_malloc_stats; /* sampled in serverCron(). */
redisAtomic long long stat_net_input_bytes; /* Bytes read from network. */
redisAtomic long long stat_net_output_bytes; /* Bytes written to network. */
redisAtomic long long stat_net_repl_input_bytes; /* Bytes read during replication, added to stat_net_input_bytes in 'info'. */
redisAtomic long long stat_net_repl_output_bytes; /* Bytes written during replication, added to stat_net_output_bytes in 'info'. */
size_t stat_current_cow_peak; /* Peak size of copy on write bytes. */
size_t stat_current_cow_bytes; /* Copy on write bytes while child is active. */
monotime stat_current_cow_updated; /* Last update time of stat_current_cow_bytes */
size_t stat_current_save_keys_processed; /* Processed keys while child is active. */
size_t stat_current_save_keys_total; /* Number of keys when child started. */
size_t stat_rdb_cow_bytes; /* Copy on write bytes during RDB saving. */
size_t stat_aof_cow_bytes; /* Copy on write bytes during AOF rewrite. */
size_t stat_module_cow_bytes; /* Copy on write bytes during module fork. */
double stat_module_progress; /* Module save progress. */
size_t stat_clients_type_memory[CLIENT_TYPE_COUNT];/* Mem usage by type */
size_t stat_cluster_links_memory; /* Mem usage by cluster links */
long long stat_unexpected_error_replies; /* Number of unexpected (aof-loading, replica to master, etc.) error replies */
long long stat_total_error_replies; /* Total number of issued error replies ( command + rejected errors ) */
long long stat_dump_payload_sanitizations; /* Number deep dump payloads integrity validations. */
long long stat_io_reads_processed; /* Number of read events processed by IO / Main threads */
long long stat_io_writes_processed; /* Number of write events processed by IO / Main threads */
redisAtomic long long stat_total_reads_processed; /* Total number of read events processed */
redisAtomic long long stat_total_writes_processed; /* Total number of write events processed */
long long stat_client_qbuf_limit_disconnections; /* Total number of clients reached query buf length limit */
long long stat_client_outbuf_limit_disconnections; /* Total number of clients reached output buf length limit */
/* The following two are used to track instantaneous metrics, like
* number of operations per second, network traffic. */
struct {
long long last_sample_base; /* The divisor of last sample window */
long long last_sample_value; /* The dividend of last sample window */
long long samples[STATS_METRIC_SAMPLES];
int idx;
} inst_metric[STATS_METRIC_COUNT];
long long stat_reply_buffer_shrinks; /* Total number of output buffer shrinks */
long long stat_reply_buffer_expands; /* Total number of output buffer expands */
monotime el_start;
/* The following two are used to record the max number of commands executed in one eventloop.
* Note that commands in transactions are also counted. */
long long el_cmd_cnt_start;
long long el_cmd_cnt_max;
/* The sum of active-expire, active-defrag and all other tasks done by cron and beforeSleep,
but excluding read, write and AOF, which are counted by other sets of metrics. */
monotime el_cron_duration;
durationStats duration_stats[EL_DURATION_TYPE_NUM];
/* Configuration */
int verbosity; /* Loglevel in redis.conf */
int maxidletime; /* Client timeout in seconds */
int tcpkeepalive; /* Set SO_KEEPALIVE if non-zero. */
int active_expire_enabled; /* Can be disabled for testing purposes. */
int active_expire_effort; /* From 1 (default) to 10, active effort. */
int lazy_expire_disabled; /* If > 0, don't trigger lazy expire */
int active_defrag_enabled;
int sanitize_dump_payload; /* Enables deep sanitization for ziplist and listpack in RDB and RESTORE. */
int skip_checksum_validation; /* Disable checksum validation for RDB and RESTORE payload. */
int jemalloc_bg_thread; /* Enable jemalloc background thread */
size_t active_defrag_ignore_bytes; /* minimum amount of fragmentation waste to start active defrag */
int active_defrag_threshold_lower; /* minimum percentage of fragmentation to start active defrag */
int active_defrag_threshold_upper; /* maximum percentage of fragmentation at which we use maximum effort */
int active_defrag_cycle_min; /* minimal effort for defrag in CPU percentage */
int active_defrag_cycle_max; /* maximal effort for defrag in CPU percentage */
unsigned long active_defrag_max_scan_fields; /* maximum number of fields of set/hash/zset/list to process from within the main dict scan */
size_t client_max_querybuf_len; /* Limit for client query buffer length */
int dbnum; /* Total number of configured DBs */
int supervised; /* 1 if supervised, 0 otherwise. */
int supervised_mode; /* See SUPERVISED_* */
int daemonize; /* True if running as a daemon */
int set_proc_title; /* True if change proc title */
char *proc_title_template; /* Process title template format */
clientBufferLimitsConfig client_obuf_limits[CLIENT_TYPE_OBUF_COUNT];
int pause_cron; /* Don't run cron tasks (debug) */
int latency_tracking_enabled; /* 1 if extended latency tracking is enabled, 0 otherwise. */
double *latency_tracking_info_percentiles; /* Extended latency tracking info output percentile list configuration. */
int latency_tracking_info_percentiles_len;
/* AOF persistence */
int aof_enabled; /* AOF configuration */
int aof_state; /* AOF_(ON|OFF|WAIT_REWRITE) */
int aof_fsync; /* Kind of fsync() policy */
char *aof_filename; /* Basename of the AOF file and manifest file */
char *aof_dirname; /* Name of the AOF directory */
int aof_no_fsync_on_rewrite; /* Don't fsync if a rewrite is in prog. */
int aof_rewrite_perc; /* Rewrite AOF if % growth is > M and... */
off_t aof_rewrite_min_size; /* the AOF file is at least N bytes. */
off_t aof_rewrite_base_size; /* AOF size on latest startup or rewrite. */
off_t aof_current_size; /* AOF current size (Including BASE + INCRs). */
off_t aof_last_incr_size; /* The size of the latest incr AOF. */
off_t aof_last_incr_fsync_offset; /* AOF offset which is already requested to be synced to disk.
* Compare with the aof_last_incr_size. */
int aof_flush_sleep; /* Micros to sleep before flush. (used by tests) */
int aof_rewrite_scheduled; /* Rewrite once BGSAVE terminates. */
sds aof_buf; /* AOF buffer, written before entering the event loop */
int aof_fd; /* File descriptor of currently selected AOF file */
int aof_selected_db; /* Currently selected DB in AOF */
time_t aof_flush_postponed_start; /* UNIX time of postponed AOF flush */
time_t aof_last_fsync; /* UNIX time of last fsync() */
time_t aof_rewrite_time_last; /* Time used by last AOF rewrite run. */
time_t aof_rewrite_time_start; /* Current AOF rewrite start time. */
time_t aof_cur_timestamp; /* Current record timestamp in AOF */
int aof_timestamp_enabled; /* Enable record timestamp in AOF */
int aof_lastbgrewrite_status; /* C_OK or C_ERR */
unsigned long aof_delayed_fsync; /* delayed AOF fsync() counter */
int aof_rewrite_incremental_fsync;/* fsync incrementally while aof rewriting? */
int rdb_save_incremental_fsync; /* fsync incrementally while rdb saving? */
int aof_last_write_status; /* C_OK or C_ERR */
int aof_last_write_errno; /* Valid if aof write/fsync status is ERR */
int aof_load_truncated; /* Don't stop on unexpected AOF EOF. */
int aof_use_rdb_preamble; /* Specify base AOF to use RDB encoding on AOF rewrites. */
redisAtomic int aof_bio_fsync_status; /* Status of AOF fsync in bio job. */
redisAtomic int aof_bio_fsync_errno; /* Errno of AOF fsync in bio job. */
aofManifest *aof_manifest; /* Used to track AOFs. */
int aof_disable_auto_gc; /* If disable automatically deleting HISTORY type AOFs?
default no. (for testings). */
/* RDB persistence */
long long dirty; /* Changes to DB from the last save */
long long dirty_before_bgsave; /* Used to restore dirty on failed BGSAVE */
long long rdb_last_load_keys_expired; /* number of expired keys when loading RDB */
long long rdb_last_load_keys_loaded; /* number of loaded keys when loading RDB */
struct saveparam *saveparams; /* Save points array for RDB */
int saveparamslen; /* Number of saving points */
char *rdb_filename; /* Name of RDB file */
int rdb_compression; /* Use compression in RDB? */
int rdb_checksum; /* Use RDB checksum? */
int rdb_del_sync_files; /* Remove RDB files used only for SYNC if
the instance does not use persistence. */
time_t lastsave; /* Unix time of last successful save */
time_t lastbgsave_try; /* Unix time of last attempted bgsave */
time_t rdb_save_time_last; /* Time used by last RDB save run. */
time_t rdb_save_time_start; /* Current RDB save start time. */
int rdb_bgsave_scheduled; /* BGSAVE when possible if true. */
int rdb_child_type; /* Type of save by active child. */
int lastbgsave_status; /* C_OK or C_ERR */
int stop_writes_on_bgsave_err; /* Don't allow writes if can't BGSAVE */
int rdb_pipe_read; /* RDB pipe used to transfer the rdb data */
/* to the parent process in diskless repl. */
int rdb_child_exit_pipe; /* Used by the diskless parent allow child exit. */
connection **rdb_pipe_conns; /* Connections which are currently the */
int rdb_pipe_numconns; /* target of diskless rdb fork child. */
int rdb_pipe_numconns_writing; /* Number of rdb conns with pending writes. */
char *rdb_pipe_buff; /* In diskless replication, this buffer holds data */
int rdb_pipe_bufflen; /* that was read from the rdb pipe. */
int rdb_key_save_delay; /* Delay in microseconds between keys while
* writing aof or rdb. (for testings). negative
* value means fractions of microseconds (on average). */
int key_load_delay; /* Delay in microseconds between keys while
* loading aof or rdb. (for testings). negative
* value means fractions of microseconds (on average). */
/* Pipe and data structures for child -> parent info sharing. */
int child_info_pipe[2]; /* Pipe used to write the child_info_data. */
int child_info_nread; /* Num of bytes of the last read from pipe */
/* Propagation of commands in AOF / replication */
redisOpArray also_propagate; /* Additional command to propagate. */
int replication_allowed; /* Are we allowed to replicate? */
/* Logging */
char *logfile; /* Path of log file */
int syslog_enabled; /* Is syslog enabled? */
char *syslog_ident; /* Syslog ident */
int syslog_facility; /* Syslog facility */
int crashlog_enabled; /* Enable signal handler for crashlog.
* disable for clean core dumps. */
int memcheck_enabled; /* Enable memory check on crash. */
int use_exit_on_panic; /* Use exit() on panic and assert rather than
* abort(). useful for Valgrind. */
/* Shutdown */
int shutdown_timeout; /* Graceful shutdown time limit in seconds. */
int shutdown_on_sigint; /* Shutdown flags configured for SIGINT. */
int shutdown_on_sigterm; /* Shutdown flags configured for SIGTERM. */
/* Replication (master) */
char replid[CONFIG_RUN_ID_SIZE+1]; /* My current replication ID. */
char replid2[CONFIG_RUN_ID_SIZE+1]; /* replid inherited from master*/
long long master_repl_offset; /* My current replication offset */
long long second_replid_offset; /* Accept offsets up to this for replid2. */
redisAtomic long long fsynced_reploff_pending;/* Largest replication offset to
* potentially have been fsynced, applied to
fsynced_reploff only when AOF state is AOF_ON
(not during the initial rewrite) */
long long fsynced_reploff; /* Largest replication offset that has been confirmed to be fsynced */
int slaveseldb; /* Last SELECTed DB in replication output */
int repl_ping_slave_period; /* Master pings the slave every N seconds */
replBacklog *repl_backlog; /* Replication backlog for partial syncs */
long long repl_backlog_size; /* Backlog circular buffer size */
time_t repl_backlog_time_limit; /* Time without slaves after the backlog
gets released. */
time_t repl_no_slaves_since; /* We have no slaves since that time.
Only valid if server.slaves len is 0. */
int repl_min_slaves_to_write; /* Min number of slaves to write. */
int repl_min_slaves_max_lag; /* Max lag of <count> slaves to write. */
int repl_good_slaves_count; /* Number of slaves with lag <= max_lag. */
int repl_diskless_sync; /* Master send RDB to slaves sockets directly. */
int repl_diskless_load; /* Slave parse RDB directly from the socket.
* see REPL_DISKLESS_LOAD_* enum */
int repl_diskless_sync_delay; /* Delay to start a diskless repl BGSAVE. */
int repl_diskless_sync_max_replicas;/* Max replicas for diskless repl BGSAVE
* delay (start sooner if they all connect). */
size_t repl_buffer_mem; /* The memory of replication buffer. */
list *repl_buffer_blocks; /* Replication buffers blocks list
* (serving replica clients and repl backlog) */
/* Replication (slave) */
char *masteruser; /* AUTH with this user and masterauth with master */
sds masterauth; /* AUTH with this password with master */
char *masterhost; /* Hostname of master */
int masterport; /* Port of master */
int repl_timeout; /* Timeout after N seconds of master idle */
client *master; /* Client that is master for this slave */
client *cached_master; /* Cached master to be reused for PSYNC. */
int repl_syncio_timeout; /* Timeout for synchronous I/O calls */
int repl_state; /* Replication status if the instance is a slave */
off_t repl_transfer_size; /* Size of RDB to read from master during sync. */
off_t repl_transfer_read; /* Amount of RDB read from master during sync. */
off_t repl_transfer_last_fsync_off; /* Offset when we fsync-ed last time. */
connection *repl_transfer_s; /* Slave -> Master SYNC connection */
int repl_transfer_fd; /* Slave -> Master SYNC temp file descriptor */
char *repl_transfer_tmpfile; /* Slave-> master SYNC temp file name */
time_t repl_transfer_lastio; /* Unix time of the latest read, for timeout */
int repl_serve_stale_data; /* Serve stale data when link is down? */
int repl_slave_ro; /* Slave is read only? */
int repl_slave_ignore_maxmemory; /* If true slaves do not evict. */
time_t repl_down_since; /* Unix time at which link with master went down */
int repl_disable_tcp_nodelay; /* Disable TCP_NODELAY after SYNC? */
int slave_priority; /* Reported in INFO and used by Sentinel. */
int replica_announced; /* If true, replica is announced by Sentinel */
int slave_announce_port; /* Give the master this listening port. */
char *slave_announce_ip; /* Give the master this ip address. */
int propagation_error_behavior; /* Configures the behavior of the replica
* when it receives an error on the replication stream */
int repl_ignore_disk_write_error; /* Configures whether replicas panic when unable to
* persist writes to AOF. */
/* The following two fields is where we store master PSYNC replid/offset
* while the PSYNC is in progress. At the end we'll copy the fields into
* the server->master client structure. */
char master_replid[CONFIG_RUN_ID_SIZE+1]; /* Master PSYNC runid. */
long long master_initial_offset; /* Master PSYNC offset. */
int repl_slave_lazy_flush; /* Lazy FLUSHALL before loading DB? */
/* Synchronous replication. */
list *clients_waiting_acks; /* Clients waiting in WAIT or WAITAOF. */
int get_ack_from_slaves; /* If true we send REPLCONF GETACK. */
/* Limits */
unsigned int maxclients; /* Max number of simultaneous clients */
unsigned long long maxmemory; /* Max number of memory bytes to use */
ssize_t maxmemory_clients; /* Memory limit for total client buffers */
int maxmemory_policy; /* Policy for key eviction */
int maxmemory_samples; /* Precision of random sampling */
int maxmemory_eviction_tenacity;/* Aggressiveness of eviction processing */
int lfu_log_factor; /* LFU logarithmic counter factor. */
int lfu_decay_time; /* LFU counter decay factor. */
long long proto_max_bulk_len; /* Protocol bulk length maximum size. */
int oom_score_adj_values[CONFIG_OOM_COUNT]; /* Linux oom_score_adj configuration */
int oom_score_adj; /* If true, oom_score_adj is managed */
int disable_thp; /* If true, disable THP by syscall */
/* Blocked clients */
unsigned int blocked_clients; /* # of clients executing a blocking cmd.*/
unsigned int blocked_clients_by_type[BLOCKED_NUM];
list *unblocked_clients; /* list of clients to unblock before next loop */
list *ready_keys; /* List of readyList structures for BLPOP & co */
/* Client side caching. */
unsigned int tracking_clients; /* # of clients with tracking enabled.*/
size_t tracking_table_max_keys; /* Max number of keys in tracking table. */
list *tracking_pending_keys; /* tracking invalidation keys pending to flush */
list *pending_push_messages; /* pending publish or other push messages to flush */
/* Sort parameters - qsort_r() is only available under BSD so we
* have to take this state global, in order to pass it to sortCompare() */
int sort_desc;
int sort_alpha;
int sort_bypattern;
int sort_store;
/* Zip structure config, see redis.conf for more information */
size_t hash_max_listpack_entries;
size_t hash_max_listpack_value;
size_t set_max_intset_entries;
size_t set_max_listpack_entries;
size_t set_max_listpack_value;
size_t zset_max_listpack_entries;
size_t zset_max_listpack_value;
size_t hll_sparse_max_bytes;
size_t stream_node_max_bytes;
long long stream_node_max_entries;
/* List parameters */
int list_max_listpack_size;
int list_compress_depth;
/* time cache */
redisAtomic time_t unixtime; /* Unix time sampled every cron cycle. */
time_t timezone; /* Cached timezone. As set by tzset(). */
int daylight_active; /* Currently in daylight saving time. */
mstime_t mstime; /* 'unixtime' in milliseconds. */
ustime_t ustime; /* 'unixtime' in microseconds. */
mstime_t cmd_time_snapshot; /* Time snapshot of the root execution nesting. */
size_t blocking_op_nesting; /* Nesting level of blocking operation, used to reset blocked_last_cron. */
long long blocked_last_cron; /* Indicate the mstime of the last time we did cron jobs from a blocking operation */
/* Pubsub */
dict *pubsub_channels; /* Map channels to list of subscribed clients */
dict *pubsub_patterns; /* A dict of pubsub_patterns */
int notify_keyspace_events; /* Events to propagate via Pub/Sub. This is an
xor of NOTIFY_... flags. */
dict *pubsubshard_channels; /* Map shard channels to list of subscribed clients */
/* Cluster */
int cluster_enabled; /* Is cluster enabled? */
int cluster_port; /* Set the cluster port for a node. */
mstime_t cluster_node_timeout; /* Cluster node timeout. */
mstime_t cluster_ping_interval; /* A debug configuration for setting how often cluster nodes send ping messages. */
char *cluster_configfile; /* Cluster auto-generated config file name. */
struct clusterState *cluster; /* State of the cluster */
int cluster_migration_barrier; /* Cluster replicas migration barrier. */
int cluster_allow_replica_migration; /* Automatic replica migrations to orphaned masters and from empty masters */
int cluster_slave_validity_factor; /* Slave max data age for failover. */
int cluster_require_full_coverage; /* If true, put the cluster down if
there is at least an uncovered slot.*/
int cluster_slave_no_failover; /* Prevent slave from starting a failover
if the master is in failure state. */
char *cluster_announce_ip; /* IP address to announce on cluster bus. */
char *cluster_announce_hostname; /* hostname to announce on cluster bus. */
char *cluster_announce_human_nodename; /* Human readable node name assigned to a node. */
int cluster_preferred_endpoint_type; /* Use the announced hostname when available. */
int cluster_announce_port; /* base port to announce on cluster bus. */
int cluster_announce_tls_port; /* TLS port to announce on cluster bus. */
int cluster_announce_bus_port; /* bus port to announce on cluster bus. */
int cluster_module_flags; /* Set of flags that Redis modules are able
to set in order to suppress certain
native Redis Cluster features. Check the
REDISMODULE_CLUSTER_FLAG_*. */
int cluster_allow_reads_when_down; /* Are reads allowed when the cluster
is down? */
int cluster_config_file_lock_fd; /* cluster config fd, will be flocked. */
unsigned long long cluster_link_msg_queue_limit_bytes; /* Memory usage limit on individual link msg queue */
int cluster_drop_packet_filter; /* Debug config that allows tactically
* dropping packets of a specific type */
/* Scripting */
mstime_t busy_reply_threshold; /* Script / module timeout in milliseconds */
int pre_command_oom_state; /* OOM before command (script?) was started */
int script_disable_deny_script; /* Allow running commands marked "no-script" inside a script. */
/* Lazy free */
int lazyfree_lazy_eviction;
int lazyfree_lazy_expire;
int lazyfree_lazy_server_del;
int lazyfree_lazy_user_del;
int lazyfree_lazy_user_flush;
/* Latency monitor */
long long latency_monitor_threshold;
dict *latency_events;
/* ACLs */
char *acl_filename; /* ACL Users file. NULL if not configured. */
unsigned long acllog_max_len; /* Maximum length of the ACL LOG list. */
sds requirepass; /* Remember the cleartext password set with
the old "requirepass" directive for
backward compatibility with Redis <= 5. */
int acl_pubsub_default; /* Default ACL pub/sub channels flag */
aclInfo acl_info; /* ACL info */
/* Assert & bug reporting */
int watchdog_period; /* Software watchdog period in ms. 0 = off */
/* System hardware info */
size_t system_memory_size; /* Total memory in system as reported by OS */
/* TLS Configuration */
int tls_cluster;
int tls_replication;
int tls_auth_clients;
redisTLSContextConfig tls_ctx_config;
/* cpu affinity */
char *server_cpulist; /* cpu affinity list of redis server main/io thread. */
char *bio_cpulist; /* cpu affinity list of bio thread. */
char *aof_rewrite_cpulist; /* cpu affinity list of aof rewrite process. */
char *bgsave_cpulist; /* cpu affinity list of bgsave process. */
/* Sentinel config */
struct sentinelConfig *sentinel_config; /* sentinel config to load at startup time. */
/* Coordinate failover info */
mstime_t failover_end_time; /* Deadline for failover command. */
int force_failover; /* If true then failover will be forced at the
* deadline, otherwise failover is aborted. */
char *target_replica_host; /* Failover target host. If null during a
* failover then any replica can be used. */
int target_replica_port; /* Failover target port */
int failover_state; /* Failover state */
int cluster_allow_pubsubshard_when_down; /* Is pubsubshard allowed when the cluster
is down, doesn't affect pubsub global. */
long reply_buffer_peak_reset_time; /* The amount of time (in milliseconds) to wait between reply buffer peak resets */
int reply_buffer_resizing_enabled; /* Is reply buffer resizing enabled (1 by default) */
/* Local environment */
char *locale_collate;
};
redisServer的参数比较多,主要的是**redisDb *db;**里面是每个数据库的存储信息,dbnum代表数据库的个数。
数据库结构
每个数据库是由redisDb结构组成的。
/* Redis database representation. There are multiple databases identified
* by integers from 0 (the default database) up to the max configured
* database. The database number is the 'id' field in the structure. */
typedef struct redisDb {
dict *dict; /* The keyspace for this DB */
dict *expires; /* Timeout of keys with a timeout set */
dict *blocking_keys; /* Keys with clients waiting for data (BLPOP)*/
dict *blocking_keys_unblock_on_nokey; /* Keys with clients waiting for
* data, and should be unblocked if key is deleted (XREADEDGROUP).
* This is a subset of blocking_keys*/
dict *ready_keys; /* Blocked keys that received a PUSH */
dict *watched_keys; /* WATCHED keys for MULTI/EXEC CAS */
int id; /* Database ID */
long long avg_ttl; /* Average TTL, just for stats */
unsigned long expires_cursor; /* Cursor of the active expire cycle. */
list *defrag_later; /* List of key names to attempt to defrag one by one, gradually. */
clusterSlotToKeyMapping *slots_to_keys; /* Array of slots to keys. Only used in cluster mode (db 0). */
} redisDb;
redisDb主要有键值空间和过期集合两个字典组成。其中dict包含所有的键值对儿信息,expires包含键的过期信息。
过期键删除
如果一个键已经过期,怎么被删除呐?删除策略已经有三种。定时删除、惰性删除和定期删除。
定时删除
在设置键过期的同时,创建一个timer,让timer在键过期时立即执行对键的删除。
- 优点:对内存友好
- 缺点:对CPU不友好
惰性删除
键过期后不做任何处理,但是当从键空间中获取键时,都检查键是否过期,如果过期的话,就删除键,否则就正常返回。
- 优点:对CPU友好
- 缺点:对内存不友好
定期删除
每隔一段时间对数据库进行一次检查,删除里面的过期键。至于每次遍历多少个库,遍历多少个键,则有配置决定。
定期删除时定时删除和惰性删除两种策略的一种整合折中。
Redis过期键删除策略
Redis服务器实际上使用的是惰性删除和定期删除两种策略的结合。通过两种策略的配合,可以很好的合理利用CPU时间和避免浪费内存空间之间取得平衡。
惰性删除实现
惰性删除主要有expireIfNeed函数实现,所有读写数据库的Redis命令在执行之前都会调用expireIfNeed函数对输入键从数据库进行删除。
/* This function is called when we are going to perform some operation
* in a given key, but such key may be already logically expired even if
* it still exists in the database. The main way this function is called
* is via lookupKey*() family of functions.
*
* The behavior of the function depends on the replication role of the
* instance, because by default replicas do not delete expired keys. They
* wait for DELs from the master for consistency matters. However even
* replicas will try to have a coherent return value for the function,
* so that read commands executed in the replica side will be able to
* behave like if the key is expired even if still present (because the
* master has yet to propagate the DEL).
*
* In masters as a side effect of finding a key which is expired, such
* key will be evicted from the database. Also this may trigger the
* propagation of a DEL/UNLINK command in AOF / replication stream.
*
* On replicas, this function does not delete expired keys by default, but
* it still returns 1 if the key is logically expired. To force deletion
* of logically expired keys even on replicas, use the EXPIRE_FORCE_DELETE_EXPIRED
* flag. Note though that if the current client is executing
* replicated commands from the master, keys are never considered expired.
*
* On the other hand, if you just want expiration check, but need to avoid
* the actual key deletion and propagation of the deletion, use the
* EXPIRE_AVOID_DELETE_EXPIRED flag.
*
* The return value of the function is 0 if the key is still valid,
* otherwise the function returns 1 if the key is expired. */
int expireIfNeeded(redisDb *db, robj *key, int flags) {
if (server.lazy_expire_disabled) return 0;
if (!keyIsExpired(db,key)) return 0;
/* If we are running in the context of a replica, instead of
* evicting the expired key from the database, we return ASAP:
* the replica key expiration is controlled by the master that will
* send us synthesized DEL operations for expired keys. The
* exception is when write operations are performed on writable
* replicas.
*
* Still we try to return the right information to the caller,
* that is, 0 if we think the key should be still valid, 1 if
* we think the key is expired at this time.
*
* When replicating commands from the master, keys are never considered
* expired. */
if (server.masterhost != NULL) {
if (server.current_client && (server.current_client->flags & CLIENT_MASTER)) return 0;
if (!(flags & EXPIRE_FORCE_DELETE_EXPIRED)) return 1;
}
/* In some cases we're explicitly instructed to return an indication of a
* missing key without actually deleting it, even on masters. */
if (flags & EXPIRE_AVOID_DELETE_EXPIRED)
return 1;
/* If 'expire' action is paused, for whatever reason, then don't expire any key.
* Typically, at the end of the pause we will properly expire the key OR we
* will have failed over and the new primary will send us the expire. */
if (isPausedActionsWithUpdate(PAUSE_ACTION_EXPIRE)) return 1;
/* The key needs to be converted from static to heap before deleted */
int static_key = key->refcount == OBJ_STATIC_REFCOUNT;
if (static_key) {
key = createStringObject(key->ptr, sdslen(key->ptr));
}
/* Delete the key */
deleteExpiredKeyAndPropagate(db,key);
if (static_key) {
decrRefCount(key);
}
return 1;
}
定期删除实现
定期删除有activeExpireCycle实现。每当Redis的服务器周期性操作serverCron函数执行时,activeExpireCycle函数就会被调用。在规定时间内,分多次遍历服务器中的各个数据库,从数据库的expires字典中随机检查一部分键的过期时间,并删除其中的过期键。
void activeExpireCycle(int type) {
/* Adjust the running parameters according to the configured expire
* effort. The default effort is 1, and the maximum configurable effort
* is 10. */
unsigned long
effort = server.active_expire_effort-1, /* Rescale from 0 to 9. */
config_keys_per_loop = ACTIVE_EXPIRE_CYCLE_KEYS_PER_LOOP +
ACTIVE_EXPIRE_CYCLE_KEYS_PER_LOOP/4*effort,
config_cycle_fast_duration = ACTIVE_EXPIRE_CYCLE_FAST_DURATION +
ACTIVE_EXPIRE_CYCLE_FAST_DURATION/4*effort,
config_cycle_slow_time_perc = ACTIVE_EXPIRE_CYCLE_SLOW_TIME_PERC +
2*effort,
config_cycle_acceptable_stale = ACTIVE_EXPIRE_CYCLE_ACCEPTABLE_STALE-
effort;
/* This function has some global state in order to continue the work
* incrementally across calls. */
static unsigned int current_db = 0; /* Next DB to test. */
static int timelimit_exit = 0; /* Time limit hit in previous call? */
static long long last_fast_cycle = 0; /* When last fast cycle ran. */
int j, iteration = 0;
int dbs_per_call = CRON_DBS_PER_CALL;
long long start = ustime(), timelimit, elapsed;
/* If 'expire' action is paused, for whatever reason, then don't expire any key.
* Typically, at the end of the pause we will properly expire the key OR we
* will have failed over and the new primary will send us the expire. */
if (isPausedActionsWithUpdate(PAUSE_ACTION_EXPIRE)) return;
if (type == ACTIVE_EXPIRE_CYCLE_FAST) {
/* Don't start a fast cycle if the previous cycle did not exit
* for time limit, unless the percentage of estimated stale keys is
* too high. Also never repeat a fast cycle for the same period
* as the fast cycle total duration itself. */
if (!timelimit_exit &&
server.stat_expired_stale_perc < config_cycle_acceptable_stale)
return;
if (start < last_fast_cycle + (long long)config_cycle_fast_duration*2)
return;
last_fast_cycle = start;
}
/* We usually should test CRON_DBS_PER_CALL per iteration, with
* two exceptions:
*
* 1) Don't test more DBs than we have.
* 2) If last time we hit the time limit, we want to scan all DBs
* in this iteration, as there is work to do in some DB and we don't want
* expired keys to use memory for too much time. */
if (dbs_per_call > server.dbnum || timelimit_exit)
dbs_per_call = server.dbnum;
/* We can use at max 'config_cycle_slow_time_perc' percentage of CPU
* time per iteration. Since this function gets called with a frequency of
* server.hz times per second, the following is the max amount of
* microseconds we can spend in this function. */
timelimit = config_cycle_slow_time_perc*1000000/server.hz/100;
timelimit_exit = 0;
if (timelimit <= 0) timelimit = 1;
if (type == ACTIVE_EXPIRE_CYCLE_FAST)
timelimit = config_cycle_fast_duration; /* in microseconds. */
/* Accumulate some global stats as we expire keys, to have some idea
* about the number of keys that are already logically expired, but still
* existing inside the database. */
long total_sampled = 0;
long total_expired = 0;
/* Try to smoke-out bugs (server.also_propagate should be empty here) */
serverAssert(server.also_propagate.numops == 0);
for (j = 0; j < dbs_per_call && timelimit_exit == 0; j++) {
/* Scan callback data including expired and checked count per iteration. */
expireScanData data;
redisDb *db = server.db+(current_db % server.dbnum);
data.db = db;
/* Increment the DB now so we are sure if we run out of time
* in the current DB we'll restart from the next. This allows to
* distribute the time evenly across DBs. */
current_db++;
/* Continue to expire if at the end of the cycle there are still
* a big percentage of keys to expire, compared to the number of keys
* we scanned. The percentage, stored in config_cycle_acceptable_stale
* is not fixed, but depends on the Redis configured "expire effort". */
do {
unsigned long num, slots;
iteration++;
/* If there is nothing to expire try next DB ASAP. */
if ((num = dictSize(db->expires)) == 0) {
db->avg_ttl = 0;
break;
}
slots = dictSlots(db->expires);
data.now = mstime();
/* When there are less than 1% filled slots, sampling the key
* space is expensive, so stop here waiting for better times...
* The dictionary will be resized asap. */
if (slots > DICT_HT_INITIAL_SIZE &&
(num*100/slots < 1)) break;
/* The main collection cycle. Scan through keys among keys
* with an expire set, checking for expired ones. */
data.sampled = 0;
data.expired = 0;
data.ttl_sum = 0;
data.ttl_samples = 0;
if (num > config_keys_per_loop)
num = config_keys_per_loop;
/* Here we access the low level representation of the hash table
* for speed concerns: this makes this code coupled with dict.c,
* but it hardly changed in ten years.
*
* Note that certain places of the hash table may be empty,
* so we want also a stop condition about the number of
* buckets that we scanned. However scanning for free buckets
* is very fast: we are in the cache line scanning a sequential
* array of NULL pointers, so we can scan a lot more buckets
* than keys in the same time. */
long max_buckets = num*20;
long checked_buckets = 0;
while (data.sampled < num && checked_buckets < max_buckets) {
db->expires_cursor = dictScan(db->expires, db->expires_cursor,
expireScanCallback, &data);
checked_buckets++;
}
total_expired += data.expired;
total_sampled += data.sampled;
/* Update the average TTL stats for this database. */
if (data.ttl_samples) {
long long avg_ttl = data.ttl_sum / data.ttl_samples;
/* Do a simple running average with a few samples.
* We just use the current estimate with a weight of 2%
* and the previous estimate with a weight of 98%. */
if (db->avg_ttl == 0) db->avg_ttl = avg_ttl;
db->avg_ttl = (db->avg_ttl/50)*49 + (avg_ttl/50);
}
/* We can't block forever here even if there are many keys to
* expire. So after a given amount of milliseconds return to the
* caller waiting for the other active expire cycle. */
if ((iteration & 0xf) == 0) { /* check once every 16 iterations. */
elapsed = ustime()-start;
if (elapsed > timelimit) {
timelimit_exit = 1;
server.stat_expired_time_cap_reached_count++;
break;
}
}
/* We don't repeat the cycle for the current database if there are
* an acceptable amount of stale keys (logically expired but yet
* not reclaimed). */
} while (data.sampled == 0 ||
(data.expired * 100 / data.sampled) > config_cycle_acceptable_stale);
}
elapsed = ustime()-start;
server.stat_expire_cycle_time_used += elapsed;
latencyAddSampleIfNeeded("expire-cycle",elapsed/1000);
/* Update our estimate of keys existing but yet to be expired.
* Running average with this sample accounting for 5%. */
double current_perc;
if (total_sampled) {
current_perc = (double)total_expired/total_sampled;
} else
current_perc = 0;
server.stat_expired_stale_perc = (current_perc*0.05)+
(server.stat_expired_stale_perc*0.95);
}
内存淘汰
淘汰策略
当Redis的运行内存超出设置的最大内存,将开启内存淘汰,也就是对键进行删除。
Redis内存淘汰策略主要分为8种。其中noeviction为默认淘汰策略
- noeviction:不淘汰任何键,再有新键写入时直接报错。
- volatile-random: 在设置了过期时间的键中,进行随机删除
- volatile-ttl:在设置了过期时间的键中,优先删除更早期的键
- volatile-lru:在设置了过期时间的键中,优先删除最久未使用的键
- volitile-lfu:在设置了过期时间的键中,优先删除最少使用的键
- allkeys-randome: 所有键中,进行随机删除
- allkeys-lru: 所有键中,优先删除最久未使用的键
- allkeys-lfu:所有键中,优先删除最少使用的键
LRU算法和LFU算法都依赖redisObject中的lru字段,但是两种算法对于数据存储有些差异。
struct redisObject {
unsigned type:4;
unsigned encoding:4;
unsigned lru:LRU_BITS; /* LRU time (relative to global lru_clock) or
* LFU data (least significant 8 bits frequency
* and most significant 16 bits access time). */
int refcount;
void *ptr;
};
LRU算法
LRU(Least Recently Used)即最近未使用。传统的LRU算法是基于链表实现的,最新操作的键会移动大表头,当进行淘汰时,只需要从表尾进行即可。
但是Redis并没有使用传统的LRU算法,因为那样会有两个问题。
- 使用链表维护所有的缓存数据,会带来额外的内存开销
- 如有有大量的键被访问时,会导致大量的链表移动操作,比较耗时,影响redis性能
Redis实现了一种近似LRU算法,使用随机采样的思想进行键值淘汰,它随机选出5个值,然后对最久未使用的键进行淘汰。
Redis对象中的Lru的24位bits用于储存键最近被访问的时间戳,可以根据时间戳计算需要淘汰的键。
LFU算法
LFU(Least Frequently Used)即最近最不常用。LFU时根据键访问的频率来进行数据淘汰。其核心思想是如果某个键过去被访问多次,未来被访问的频率也会更高。
- ldt:用于记录键被访问的时间戳
- logc:用于记录键被访问的频次,值越小越容易被淘汰
当某个键被访问时logc会经历两个操作
- 按照上次被访问的时间戳计算衰减,lfu-decay-time控制衰减速度。
- 根据本次访问对值进行增加,lfu-log-factor控制增长速度。