nginx的合并回源

最近使用nginx做缓存,发现当大量客户端访问一个没有cache的文件时,回源的流量非常大,在站源上查看日志也看到确实有并发的请求。这个就是需要改成合并回源,当cache内没有数据的时候,只允许1个请求去站源请求数据,写到本地cache。nginx从1.1.12开始原生支持合并回源了。主要有2个配置项(http://nginx.org/en/docs/http/ngx_http_proxy_module.html#proxy_cache_lock).

proxy_cache_lock on;  #一次只允许 cache数据被1个请求更新  
proxy_cache_lock_timeout 15s; # 设置cache lock的时间  
proxy_cache_use_stale  error timeout invalid_header http_500 http_502 http_503 http_504 http_404; # 设置几种异常情况下任然使用过期的cache数据

需要注意的是对于缓存数据,是根据自己设置的proxy_cache_key进行识别的,别自己把一些非必要的变量设置在了 proxy_cache_key内,导致对应同一个URL都有不同的cache。
附上完整配置文件

user                    app root;  
worker_processes            auto;  
worker_cpu_affinity         auto;  
worker_rlimit_nofile        100000;  
  
error_log                   "pipe:/opt//install/cronolog/sbin/cronolog /home/app/webserver/logs/cronolog/%Y/%m/%Y-%m-%d-error_log" warn;  
pid                         /home/app/webserver/logs/nginx.pid;  
  
events {  
use                     epoll;  
worker_connections      20480;  
}  
  
  
http {  
include                 mime.types;  
default_type            application/octet-stream;  
root                    /home/app/webserver/htdocs;  
sendfile                on;  
tcp_nopush              on;  
server_tokens           off;  
keepalive_timeout       0;  
client_header_timeout   1m;  
send_timeout            1m;  
client_max_body_size    3m;  
log_format              proxyformat    "$remote_addr $request_time_usec $http_x_readtime [$time_local] \"$request_method http://$host$request_uri\" $status $body_bytes_sent \"$http_referer\" \"$http_user_agent\" $upstream_cache_status $upstream_addr $request_time $upstream_response_time";  
  
  
access_log              "pipe:/opt//install/cronolog/sbin/cronolog /home/app/webserver/logs/cronolog/%Y/%m/%Y-%m-%d-access_log" proxyformat;  
log_not_found           off;  
  
gzip                    on;  
gzip_http_version       1.0;  
gzip_comp_level         6;  
gzip_min_length         1024;  
gzip_proxied            any;  
gzip_vary               on;  
gzip_disable            msie6;  
gzip_buffers            96 8k;  
gzip_types              text/xml text/plain text/css application/javascript application/x-javascript application/rss+xml;  
beacon                  off;  
proxy_cache_path  /home/app/webserver/data/  levels=1:2   keys_zone=yum_zone:100m inactive=100h max_size=100G;  
# only one request at a time will be allowed to populate a new cache element  
proxy_cache_lock on;  
# lock time 15s  
proxy_cache_lock_timeout 15s;  
proxy_cache_use_stale  error timeout invalid_header http_500 http_502 http_503 http_504 http_404;  
  
upstream yum_source {  
  
server 192.12.251.20:80;  
keepalive 2;  
}  
server {  
listen              80 default_server;  
server_name         mirrors.xxx.net;  
index  index.html index.htm;  
proxy_set_header Connection "";  
proxy_http_version 1.1;  
proxy_intercept_errors on;  
proxy_set_header Host      $http_host;  
  
location ~* \.(xml|xm_|gz|sh|conf|tar|repo|html|bz2)$ {  
  
proxy_pass   http://yum_source;  
proxy_cache yum_zone;  
proxy_cache_min_uses 1;  
proxy_cache_methods GET HEAD;  
proxy_cache_key $scheme$proxy_host$request_uri$request_method;  
proxy_cache_valid  404      1m;  
proxy_cache_valid 200 302 5m;  
add_header  Cache-status $upstream_cache_status;  
add_header  Cache-Time  "5m";  
  
}  
location / {  
proxy_pass   http://yum_source;  
proxy_cache yum_zone;  
proxy_cache_min_uses 1;  
proxy_cache_methods GET HEAD;  
proxy_cache_key $scheme$proxy_host$request_uri$request_method;  
proxy_cache_valid  404      1m;  
proxy_cache_valid 200 302 48h;  
  
add_header  Cache-status $upstream_cache_status;  
add_header  Cache-Time  48h;  
}  
}  
  
}