rgw: Adding data cache and CDN capabilities

This feature is meant to add data cache feature to the RGW.
It is using Nginx as a cache server.
This feature adds 2 new apis, Auth api and Cache api.

Some Performance tests using hsbench:
16K objs:

RGW direct access:
Mode: GET, Ops: 3001, MB/s: 46.89, Lat(ms): [ min: 30.4, avg: 33.2, 99%: 34.7, max: 35.2 ]
Nginx access (objs have not been cached)
Mode: GET, Ops: 1363, MB/s: 21.30, Lat(ms): [ min: 63.8, avg: 73.8, 99%: 78.1, max: 86.6 ]
Nginx access (objs have been cached)
Mode: GET, Ops: 2446, MB/s: 38.22, Lat(ms): [ min: 36.9, avg: 41.0, 99%: 43.9, max: 45.9 ]

512K objs:
RGW direct access:
Mode: GET, Ops: 1492, MB/s: 746.00 Lat(ms): [ min: 60.4, avg: 66.7, 99%: 73.5, max: 75.9 ]

Nginx access (objs have not been cached)
Mode: GET, Ops: 1382, MB/s: 691.00, Lat(ms): [ min: 64.5, avg: 72.1, 99%: 77.9, max: 82.8 ]

Nginx access (objs have been cached)
Mode: GET, Ops: 2947, MB/s: 1473.50, Lat(ms): [ min: 3.3, avg: 32.7, 99%: 62.2, max: 72.1 ]

2M objs:
RGW direct access:
Mode: GET, Ops: 613, MB/s: 1226.00, Lat(ms): [ min: 143.6, avg: 162.0, 99%: 180.2, max: 190.1 ]

Nginx access (objs have not been cached)
Mode: GET, Ops: 462, MB/s: 924.00, Lat(ms): [ min: 180.2, avg: 215.0, 99%: 243.2, max: 248.3 ]

Nginx access (objs have been cached)
Mode: GET, Ops: 1392, MB/s: 2784.00, Lat(ms): [ min: 3.0, avg: 5.3, 99%: 18.8, max: 30.2 ]

10M objs:
RGW direct access:
Mode: GET, Ops: 135, MB/s: 1350.00, Lat(ms): [ min: 191.1, avg: 265.8, 99%: 373.1, max: 382.8 ]

Nginx access (objs have not been cached)
Mode: GET, Ops: 120, MB/s: 1200.00, Lat(ms): [ min: 302.1, avg: 428.8, 99%: 561.2, max: 583.7 ]

Nginx access (objs have been cached)
Mode: GET, Ops: 281, MB/s: 2810.00, Lat(ms): [ min: 3.2, avg: 8.3, 99%: 16.9, max: 25.6 ]

gdal_translate 4GiB image gdal_translate -co NUM_THREADS=ALL_CPUS /vsis3/hello/sat.tif

Nginx (have not cached):
real 0m24.714s
user 0m8.692s
sys 0m10.360s

Nginx (have been cached):
real 0m21.070s
user 0m9.140s
sys 0m10.316s

RGW:
real 0m21.859s
user 0m8.850s
sys 0m10.386s

The results are showing that for objects larger than 512K the cache will increase the performance by twice or more.

For small objs, the overhead of sending the auth request will make the cache less efficient

The result for cached objects in the 10MB test can be explained by net limit of 25 Gb/s(it could reach more)

In Gdal (image decoder/encoder over s3 using range requests) the results were not that different because of Gdal single cpu encoding/decoding.
Gdal have been chosen because of the ability to check the smart cache of the nginx.
https://www.nginx.com/blog/smart-efficient-byte-range-caching-nginx/

Signed-off-by: Or Friedmann <ofriedma@redhat.com>
This commit is contained in:
Or Friedmann 2020-02-03 12:36:10 +02:00
parent c7e7009a69
commit 1bcd212fd2
11 changed files with 489 additions and 3 deletions

View File

@ -49,6 +49,7 @@ you may write data with one API and retrieve it with the other.
Config Reference <config-ref>
Admin Guide <admin>
S3 API <s3>
Data caching and CDN <rgw-cache.rst>
Swift API <swift>
Admin Ops API <adminops>
Python binding <api>

View File

@ -0,0 +1,91 @@
#config cache size and path to the cache directory, you should make sure that the user that is running nginx have permissions to access the cache directory
#max_size means that Nginx will not cache more than 20G, It should be tuned to a larger number if the /data/cache is bigger
proxy_cache_path /data/cache levels=3:2 keys_zone=mycache:999m max_size=20G inactive=1d use_temp_path=off;
upstream rgws {
# List of all rgws (ips or resolvable names)
server rgw1:8000 max_fails=2 fail_timeout=5s;
server rgw2:8000 max_fails=2 fail_timeout=5s;
server rgw3:8000 max_fails=2 fail_timeout=5s;
}
server {
listen 80;
server_name cacher;
location /authentication {
internal;
limit_except GET { deny all; }
proxy_pass http://rgws$request_uri;
proxy_pass_request_body off;
proxy_set_header Host $host;
# setting x-rgw-auth allow the RGW the ability to only authorize the request without fetching the obj data
proxy_set_header x-rgw-auth "yes";
proxy_set_header Authorization $http_authorization;
proxy_http_version 1.1;
proxy_method $request_method;
# Do not convert HEAD requests into GET requests
proxy_cache_convert_head off;
error_page 404 = @outage;
proxy_intercept_errors on;
if ($request_uri = "/"){
return 200;
}
# URI included with question mark is not being cached
if ($request_uri ~* (\?)){
return 200;
}
}
location @outage{
return 403;
}
location / {
limit_except GET { deny all; }
auth_request /authentication;
proxy_pass http://rgws;
set $authvar '';
# if $do_not_cache is not empty the request would not be cached, this is relevant for list op for example
set $do_not_cache '';
# the IP or name of the RGWs
rewrite_by_lua_file /etc/nginx/nginx-lua-file.lua;
#proxy_set_header Authorization $http_authorization;
# my cache configured at the top of the file
proxy_cache mycache;
proxy_cache_lock_timeout 0s;
proxy_cache_lock_age 1000s;
proxy_http_version 1.1;
set $date $aws_auth_date;
# Getting 403 if this header not set
proxy_set_header Host $host;
# Cache all 200 OK's for 1 day
proxy_cache_valid 200 206 1d;
# Use stale cache file in all errors from upstream if we can
proxy_cache_use_stale updating;
proxy_cache_background_update on;
# Try to check if etag have changed, if yes, do not re-fetch from rgw the object
proxy_cache_revalidate on;
# Lock the cache so that only one request can populate it at a time
proxy_cache_lock on;
# prevent convertion of head requests to get requests
proxy_cache_convert_head off;
# Listing all buckets should not be cached
if ($request_uri = "/"){
set $do_not_cache "no";
set $date $http_x_amz_date;
}
# URI including question mark are not supported to prevent bucket listing cache
if ($request_uri ~* (\?)){
set $do_not_cache "no";
set $date $http_x_amz_date;
}
# Only aws4 requests are being cached - As the aws auth module supporting only aws v2
if ($http_authorization !~* "aws4_request") {
set $date $http_x_amz_date;
}
# Use the original x-amz-date if the aws auth module didn't create one
proxy_set_header x-amz-date $date;
proxy_set_header X-Amz-Cache $authvar;
proxy_no_cache $do_not_cache;
proxy_set_header Authorization $awsauth;
# This is on which content the nginx to use for hashing the cache keys
proxy_cache_key "$request_uri$request_method$request_body";
client_max_body_size 20G;
}
}

View File

@ -0,0 +1,26 @@
local check = ngx.req.get_headers()["AUTHORIZATION"]
local uri = ngx.var.request_uri
local ngx_re = require "ngx.re"
local hdrs = ngx.req.get_headers()
--Take all signedheaders names, this for creating the X-Amz-Cache which is necessary to override range header to be able to readahead an object
local res, err = ngx_re.split(check,"SignedHeaders=")
local res2, err2 = ngx_re.split(res[2],",")
local res3, err3 = ngx_re.split(res2[1],";")
local t = {}
local concathdrs = string.char(0x00)
for i = 1, #res3, 1 do
if hdrs[res3[i]] ~= nil then
--0xB1 is the separator between header name and value
t[i] = res3[i] .. string.char(0xB1) .. hdrs[res3[i]]
--0xB2 is the separator between headers
concathdrs = concathdrs .. string.char(0xB2) .. t[i]
end
end
-- check if the authorization header is not empty
if check ~= nil then
local xamzcache = concathdrs:sub(2)
xamzcache = xamzcache .. string.char(0xB2) .. "Authorization" .. string.char(0xB1) .. check
if xamzcache:find("aws4_request") ~= nil and uri ~= "/" and uri:find("?") == nil then
ngx.var.authvar = xamzcache
end
end

View File

@ -0,0 +1,81 @@
#config cache size and path to the cache directory, you should make sure that the user that is running nginx have permissions to access the cache directory
#max_size means that Nginx will not cache more than 20G, It should be tuned to a larger number if the /data/cache is bigger
proxy_cache_path /data/cache levels=3:2 keys_zone=mycache:999m max_size=20G inactive=1d use_temp_path=off;
upstream rgws {
# List of all rgws (ips or resolvable names)
server rgw1:8000 max_fails=2 fail_timeout=5s;
server rgw2:8000 max_fails=2 fail_timeout=5s;
server rgw3:8000 max_fails=2 fail_timeout=5s;
}
server {
listen 80;
server_name cacher;
location /authentication {
internal;
limit_except GET { deny all; }
proxy_pass http://rgws$request_uri;
proxy_pass_request_body off;
proxy_set_header Host $host;
# setting x-rgw-auth allow the RGW the ability to only authorize the request without fetching the obj data
proxy_set_header x-rgw-auth "yes";
proxy_set_header Authorization $http_authorization;
proxy_http_version 1.1;
proxy_method $request_method;
# Do not convert HEAD requests into GET requests
proxy_cache_convert_head off;
error_page 404 = @outage;
proxy_intercept_errors on;
if ($request_uri = "/"){
return 200;
}
# URI included with question mark is not being cached
if ($request_uri ~* (\?)){
return 200;
}
}
location @outage{
return 403;
}
location / {
limit_except GET { deny all; }
auth_request /authentication;
proxy_pass http://rgws;
# if $do_not_cache is not empty the request would not be cached, this is relevant for list op for example
set $do_not_cache '';
# the IP or name of the RGWs
#proxy_set_header Authorization $http_authorization;
# my cache configured at the top of the file
proxy_cache mycache;
proxy_cache_lock_timeout 0s;
proxy_cache_lock_age 1000s;
proxy_http_version 1.1;
# Getting 403 if this header not set
proxy_set_header Host $host;
# Cache all 200 OK's for 1 day
proxy_cache_valid 200 206 1d;
# Use stale cache file in all errors from upstream if we can
proxy_cache_use_stale updating;
proxy_cache_background_update on;
# Try to check if etag have changed, if yes, do not re-fetch from rgw the object
proxy_cache_revalidate on;
# Lock the cache so that only one request can populate it at a time
proxy_cache_lock on;
# prevent convertion of head requests to get requests
proxy_cache_convert_head off;
# Listing all buckets should not be cached
if ($request_uri = "/"){
set $do_not_cache "no";
}
# URI including question mark are not supported to prevent bucket listing cache
if ($request_uri ~* (\?)){
set $do_not_cache "no";
}
# Use the original x-amz-date if the aws auth module didn't create one
proxy_no_cache $do_not_cache;
proxy_set_header Authorization $http_authorization;
proxy_set_header Range $http_range;
# This is on which content the nginx to use for hashing the cache keys
proxy_cache_key "$request_uri$request_method$request_body$http_range";
client_max_body_size 20G;
}
}

View File

@ -0,0 +1,93 @@
#config cache size and path to the cache directory, you should make sure that the user that is running nginx have permissions to access the cache directory
#max_size means that Nginx will not cache more than 20G, It should be tuned to a larger number if the /data/cache is bigger
proxy_cache_path /data/cache levels=3:2 keys_zone=mycache:999m max_size=20G inactive=1d use_temp_path=off;
upstream rgws {
# List of all rgws (ips or resolvable names)
server rgw1:8000 max_fails=2 fail_timeout=5s;
server rgw2:8000 max_fails=2 fail_timeout=5s;
server rgw3:8000 max_fails=2 fail_timeout=5s;
}
server {
listen 80;
server_name cacher;
location /authentication {
internal;
limit_except GET { deny all; }
proxy_pass http://rgws$request_uri;
proxy_pass_request_body off;
proxy_set_header Host $host;
# setting x-rgw-auth allow the RGW the ability to only authorize the request without fetching the obj data
proxy_set_header x-rgw-auth "yes";
proxy_set_header Authorization $http_authorization;
proxy_http_version 1.1;
proxy_method $request_method;
# Do not convert HEAD requests into GET requests
proxy_cache_convert_head off;
error_page 404 = @outage;
proxy_intercept_errors on;
if ($request_uri = "/"){
return 200;
}
# URI included with question mark is not being cached
if ($request_uri ~* (\?)){
return 200;
}
}
location @outage{
return 403;
}
location / {
slice 1m;
limit_except GET { deny all; }
auth_request /authentication;
proxy_set_header Range $slice_range;
proxy_pass http://rgws;
set $authvar '';
# if $do_not_cache is not empty the request would not be cached, this is relevant for list op for example
set $do_not_cache '';
# the IP or name of the RGWs
rewrite_by_lua_file /etc/nginx/nginx-lua-file.lua;
#proxy_set_header Authorization $http_authorization;
# my cache configured at the top of the file
proxy_cache mycache;
proxy_cache_lock_timeout 0s;
proxy_cache_lock_age 1000s;
proxy_http_version 1.1;
set $date $aws_auth_date;
# Getting 403 if this header not set
proxy_set_header Host $host;
# Cache all 200 OK's for 1 day
proxy_cache_valid 200 206 1d;
# Use stale cache file in all errors from upstream if we can
proxy_cache_use_stale updating;
proxy_cache_background_update on;
# Try to check if etag have changed, if yes, do not re-fetch from rgw the object
proxy_cache_revalidate on;
# Lock the cache so that only one request can populate it at a time
proxy_cache_lock on;
# prevent convertion of head requests to get requests
proxy_cache_convert_head off;
# Listing all buckets should not be cached
if ($request_uri = "/"){
set $do_not_cache "no";
set $date $http_x_amz_date;
}
# URI including question mark are not supported to prevent bucket listing cache
if ($request_uri ~* (\?)){
set $do_not_cache "no";
set $date $http_x_amz_date;
}
# Only aws4 requests are being cached - As the aws auth module supporting only aws v2
if ($http_authorization !~* "aws4_request") {
set $date $http_x_amz_date;
}
# Use the original x-amz-date if the aws auth module didn't create one
proxy_set_header x-amz-date $date;
proxy_set_header X-Amz-Cache $authvar;
proxy_no_cache $do_not_cache;
proxy_set_header Authorization $awsauth;
# This is on which content the nginx to use for hashing the cache keys
proxy_cache_key "$request_uri$request_method$request_body$slice_range";
client_max_body_size 20G;
}
}

40
doc/radosgw/nginx.conf Normal file
View File

@ -0,0 +1,40 @@
user nginx;
#Process per core
worker_processes auto;
pid /var/run/nginx.pid;
events {
#Number of connections per worker
worker_connections 1024;
}
http {
lua_package_path '/usr/local/openresty/lualib/?.lua;;';
aws_auth $aws_token {
# access key and secret key of the cache
access_key cache;
secret_key cache;
service s3;
region us-east-1;
}
# This map is used to choose the original authorization header if the aws_auth module refuse to create one
map $aws_token $awsauth {
default $http_authorization;
~. $aws_token; # Regular expression to match any value
}
include /etc/nginx/mime.types;
default_type application/octet-stream;
log_format main '$remote_addr - $remote_user [$time_local] "$request" '
'$status $body_bytes_sent "$http_referer" '
'"$http_user_agent" "$http_x_forwarded_for"';
error_log /var/log/nginx/error.log;
access_log /var/log/nginx/access.log main;
sendfile on;
tcp_nodelay on;
keepalive_timeout 65;
include /etc/nginx/conf.d/*.conf;
}

98
doc/radosgw/rgw-cache.rst Normal file
View File

@ -0,0 +1,98 @@
==========================
RGW Data caching and CDN
==========================
.. versionadded:: Octopus
.. contents::
This new feature adds to RGW the ability to securely cache objects and offload the workload from the cluster, using Nginx.
After an object is accessed the first time it will be stored on top of Nginx dir.
Every request the Nginx will not fetch data from the RGW or the cluster, It will only check for permissions for that user from the RGW.
This feature is based on some Nginx modules, ngx_http_auth_request_module, https://github.com/kaltura/nginx-aws-auth-module, Openresty for lua capablities.
Currently this feature only works for GET requests and it will cache only AWSv4 requests (only s3 requests).
The feature introduces 2 new APIs: Auth and Cache.
New APIs
-------------------------
There are 2 new apis for this feature:
Auth API - Nginx using it to validate that an user can access the cached data
Cache API - Adding the ability to override securely Range header, that way Nginx can use it is own smart cache on top of S3:
https://www.nginx.com/blog/smart-efficient-byte-range-caching-nginx/
Using this API giving the ability to read ahead objects when clients asking a specific range from the object.
The second time the client will ask another range, the Nginx will have it and it will provide the other range from the cache.
Auth API
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
This API meant to allow the RGW only check if a user is authorized to access an object or not.
To use it the client should add ``Auth`` Header to his request, if the client does it then the RGW will return 200 or 206 for successfully authorized.
If the client does not have permissions the RGW will return 403.
Cache API
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
This API meant to allow changing signed Range headers using a privileged user, cache user.
Creating cache user
::
$ radosgw-admin user create --uid=``uid for cache user`` --display-name=``cache user`` --caps="amz-cache=read"
This user can send to the RGW the Cache api header ``X-Amz-Cache``, this header contains the headers from the original request(before changing the Range header).
It means that ``X-Amz-Cache`` built from several headers.
The headers that are building the ``X-Amz-Cache`` header are separated by char with ascii code 177 and the header name and value are separated by char ascii code 178.
The RGW will check that the cache user is authorized user and it is a cache user,
if yes it will use the ``X-Amz-Cache`` to revalidate that the user have permissions, using the headers from the X-Amz-Cache.
During this flow the RGW will override the Range header.
Using Nginx with RGW
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Download the source of Openresty:
::
$ wget https://openresty.org/download/openresty-1.15.8.3.tar.gz
git clone the aws auth nginx module:
::
$ git clone https://github.com/kaltura/nginx-aws-auth-module
untar the openresty package:
::
$ tar xvzf openresty-1.15.8.3.tar.gz
$ cd openresty-1.15.8.3
Compile openresty, Make sure that you have pcre lib and openssl lib:
::
$ sudo yum install pcre-devel openssl-devel gcc curl zlib-devel nginx
$ ./configure --add-module=<the nginx-aws-auth-module dir> --with-http_auth_request_module --with-http_slice_module
$ gmake -j $(nproc)
$ sudo gmake install
$ sudo ln -sf /usr/local/openresty/bin/openresty /usr/bin/nginx
Put in-place your nginx configuration files and edit them according to your environment:
nginx.conf should go to /etc/nginx/nginx.conf
nginx-lua-file.lua should go to /etc/nginx/nginx-lua-file.lua
nginx-default.conf should go to /etc/nginx/conf.d/nginx-default.conf
It is possible to use nginx slicing which is a better method for streaming purposes.
For using slice you should use nginx-slicing.conf and not nginx-default.conf
Further information about nginx slicing:
https://docs.nginx.com/nginx/admin-guide/content-cache/content-caching/#byte-range-caching
If you do not want to use the prefetch caching, It is possible to replace nginx-default.conf with nginx-noprefetch.conf
Using noprefetch means that if the client is sending range request of 0-4095 and then 0-4096 Nginx will cache those requests separately, So it will need to fetch those requests twice.

View File

@ -1841,7 +1841,8 @@ bool RGWUserCaps::is_valid_cap_type(const string& tp)
"mdlog",
"datalog",
"roles",
"user-policy"};
"user-policy",
"amz-cache"};
for (unsigned int i = 0; i < sizeof(cap_type) / sizeof(char *); ++i) {
if (tp.compare(cap_type[i]) == 0) {

View File

@ -2143,7 +2143,7 @@ int RGWGetObj::get_data_cb(bufferlist& bl, off_t bl_ofs, off_t bl_len)
bool RGWGetObj::prefetch_data()
{
/* HEAD request, stop prefetch*/
if (!get_data) {
if (!get_data || s->info.env->exists("HTTP_X_RGW_AUTH")) {
return false;
}
@ -2245,7 +2245,10 @@ void RGWGetObj::execute()
if (get_type() == RGW_OP_STAT_OBJ) {
return;
}
if (s->info.env->exists("HTTP_X_RGW_AUTH")) {
op_ret = 0;
goto done_err;
}
/* start gettorrent */
if (torrent.get_flag())
{

View File

@ -418,6 +418,56 @@ int RGWGetObj_ObjStore_S3::get_decrypt_filter(std::unique_ptr<RGWGetObj_Filter>
}
return res;
}
int RGWGetObj_ObjStore_S3::verify_requester(const rgw::auth::StrategyRegistry& auth_registry)
{
int ret = -EINVAL;
ret = RGWOp::verify_requester(auth_registry);
if(!s->user->get_caps().check_cap("amz-cache", RGW_CAP_READ) && !ret && s->info.env->exists("HTTP_X_AMZ_CACHE"))
ret = override_range_hdr(auth_registry);
return ret;
}
int RGWGetObj_ObjStore_S3::override_range_hdr(const rgw::auth::StrategyRegistry& auth_registry)
{
int ret = -EINVAL;
ldpp_dout(this,2) << "cache override headers" << dendl;
try {
RGWEnv* rgw_env = const_cast<RGWEnv *>(s->info.env);
// Maybe we need to change env inside req_info to non const var, maybe we can use const_cast or maybe keep it that way (changing rgw_env from the ref)
const char *backup_range = rgw_env->get("HTTP_RANGE");
const char hdrs_split[2] = {(char)178,'\0'};
const char kv_split[2] = {(char)177,'\0'};
std::string cache_hdr(rgw_env->get("HTTP_X_AMZ_CACHE")); // we are calling process_cache_request method only if the header exists
vector<string> cache_hdrs, cache_kvs;
boost::split(cache_hdrs,cache_hdr,boost::is_any_of(hdrs_split));
cache_hdrs.erase(cache_hdrs.begin());
ldpp_dout(this,2) << "starting parse cache headers" << dendl;
for (auto i = 0; i < cache_hdrs.size(); ++i)
{
boost::split(cache_kvs,cache_hdrs[i],boost::is_any_of(kv_split));
ldpp_dout(this,2) << "after splitting cache kv " << dendl;
if(cache_kvs.size() == 2) {
boost::replace_all(cache_kvs[0],"-","_");
rgw_env->set("HTTP_" + cache_kvs[0],cache_kvs[1]);
ldpp_dout(this,2) << "after splitting cache kv key: " << "HTTP_" + cache_kvs[0] << rgw_env->get((std::string("HTTP_") + cache_kvs[0]).c_str()) << dendl;
} else {
return -EINVAL;
}
}
ret = RGWOp::verify_requester(auth_registry);
if(!ret && backup_range) {
rgw_env->set("HTTP_RANGE",backup_range);
} else {
rgw_env->remove("HTTP_RANGE");
}
}
catch(const ceph::crypto::DigestException& e) {
dout(0) << "cache authentication failed" << e.what() << dendl;
abort_early(s, this, -EINVAL, dialect_handler);
}
return ret;
}
void RGWGetObjTags_ObjStore_S3::send_response_data(bufferlist& bl)
{

View File

@ -46,6 +46,8 @@ public:
RGWGetObj_ObjStore_S3() {}
~RGWGetObj_ObjStore_S3() override {}
int verify_requester(const rgw::auth::StrategyRegistry& auth_registry) override;
int override_range_hdr(const rgw::auth::StrategyRegistry& auth_registry);
int get_params() override;
int send_response_data_error() override;
int send_response_data(bufferlist& bl, off_t ofs, off_t len) override;