ceph/examples/rgw-cache/nginx-lua-file.lua

local check = ngx.req.get_headers()["AUTHORIZATION"]
local uri =  ngx.var.request_uri
local ngx_re = require "ngx.re"
local hdrs = ngx.req.get_headers()
--Take all signedheaders names, this for creating the X-Amz-Cache which is necessary to override range header to be able to readahead an object
local res, err = ngx_re.split(check,"SignedHeaders=")
local res2, err2 = ngx_re.split(res[2],",")
local res3, err3 = ngx_re.split(res2[1],";")
local t = {}
local concathdrs = string.char(0x00)
for i = 1, #res3, 1 do
    if hdrs[res3[i]] ~= nil then
--0xB1 is the separator between header name and value 
        t[i] = res3[i] .. string.char(0xB1) ..  hdrs[res3[i]]
--0xB2 is the separator between headers
        concathdrs = concathdrs .. string.char(0xB2) .. t[i]
    end
end
-- check if the authorization header is not empty
if check ~= nil then
    local xamzcache = concathdrs:sub(2)
    xamzcache = xamzcache .. string.char(0xB2) .. "Authorization" .. string.char(0xB1) .. check
        if xamzcache:find("aws4_request") ~= nil and uri ~= "/" and uri:find("?") == nil and hdrs["if-match"] == nil then
            ngx.var.authvar = xamzcache
        end
end
rgw: Adding data cache and CDN capabilities This feature is meant to add data cache feature to the RGW. It is using Nginx as a cache server. This feature adds 2 new apis, Auth api and Cache api. Some Performance tests using hsbench: 16K objs: RGW direct access: Mode: GET, Ops: 3001, MB/s: 46.89, Lat(ms): [ min: 30.4, avg: 33.2, 99%: 34.7, max: 35.2 ] Nginx access (objs have not been cached) Mode: GET, Ops: 1363, MB/s: 21.30, Lat(ms): [ min: 63.8, avg: 73.8, 99%: 78.1, max: 86.6 ] Nginx access (objs have been cached) Mode: GET, Ops: 2446, MB/s: 38.22, Lat(ms): [ min: 36.9, avg: 41.0, 99%: 43.9, max: 45.9 ] 512K objs: RGW direct access: Mode: GET, Ops: 1492, MB/s: 746.00 Lat(ms): [ min: 60.4, avg: 66.7, 99%: 73.5, max: 75.9 ] Nginx access (objs have not been cached) Mode: GET, Ops: 1382, MB/s: 691.00, Lat(ms): [ min: 64.5, avg: 72.1, 99%: 77.9, max: 82.8 ] Nginx access (objs have been cached) Mode: GET, Ops: 2947, MB/s: 1473.50, Lat(ms): [ min: 3.3, avg: 32.7, 99%: 62.2, max: 72.1 ] 2M objs: RGW direct access: Mode: GET, Ops: 613, MB/s: 1226.00, Lat(ms): [ min: 143.6, avg: 162.0, 99%: 180.2, max: 190.1 ] Nginx access (objs have not been cached) Mode: GET, Ops: 462, MB/s: 924.00, Lat(ms): [ min: 180.2, avg: 215.0, 99%: 243.2, max: 248.3 ] Nginx access (objs have been cached) Mode: GET, Ops: 1392, MB/s: 2784.00, Lat(ms): [ min: 3.0, avg: 5.3, 99%: 18.8, max: 30.2 ] 10M objs: RGW direct access: Mode: GET, Ops: 135, MB/s: 1350.00, Lat(ms): [ min: 191.1, avg: 265.8, 99%: 373.1, max: 382.8 ] Nginx access (objs have not been cached) Mode: GET, Ops: 120, MB/s: 1200.00, Lat(ms): [ min: 302.1, avg: 428.8, 99%: 561.2, max: 583.7 ] Nginx access (objs have been cached) Mode: GET, Ops: 281, MB/s: 2810.00, Lat(ms): [ min: 3.2, avg: 8.3, 99%: 16.9, max: 25.6 ] gdal_translate 4GiB image gdal_translate -co NUM_THREADS=ALL_CPUS /vsis3/hello/sat.tif Nginx (have not cached): real 0m24.714s user 0m8.692s sys 0m10.360s Nginx (have been cached): real 0m21.070s user 0m9.140s sys 0m10.316s RGW: real 0m21.859s user 0m8.850s sys 0m10.386s The results are showing that for objects larger than 512K the cache will increase the performance by twice or more. For small objs, the overhead of sending the auth request will make the cache less efficient The result for cached objects in the 10MB test can be explained by net limit of 25 Gb/s(it could reach more) In Gdal (image decoder/encoder over s3 using range requests) the results were not that different because of Gdal single cpu encoding/decoding. Gdal have been chosen because of the ability to check the smart cache of the nginx. https://www.nginx.com/blog/smart-efficient-byte-range-caching-nginx/ Signed-off-by: Or Friedmann <ofriedma@redhat.com> 2020-02-03 10:36:10 +00:00			`local check = ngx.req.get_headers()["AUTHORIZATION"]`
			`local uri = ngx.var.request_uri`
			`local ngx_re = require "ngx.re"`
			`local hdrs = ngx.req.get_headers()`
			`--Take all signedheaders names, this for creating the X-Amz-Cache which is necessary to override range header to be able to readahead an object`
			`local res, err = ngx_re.split(check,"SignedHeaders=")`
			`local res2, err2 = ngx_re.split(res[2],",")`
			`local res3, err3 = ngx_re.split(res2[1],";")`
			`local t = {}`
			`local concathdrs = string.char(0x00)`
			`for i = 1, #res3, 1 do`
			`if hdrs[res3[i]] ~= nil then`
			`--0xB1 is the separator between header name and value`
			`t[i] = res3[i] .. string.char(0xB1) .. hdrs[res3[i]]`
			`--0xB2 is the separator between headers`
			`concathdrs = concathdrs .. string.char(0xB2) .. t[i]`
			`end`
			`end`
			`-- check if the authorization header is not empty`
			`if check ~= nil then`
			`local xamzcache = concathdrs:sub(2)`
			`xamzcache = xamzcache .. string.char(0xB2) .. "Authorization" .. string.char(0xB1) .. check`
rgw: add PUT and POST req support to data cache facilitates the full usage of the Nginx cache endpoint with s3 tools that support AWSv4 like s3cmd,aws-cli, benchmarking tools like hsbench and also hadoop/s3a. Co-authored-by: Or Friedmann <ofriedma@redhat.com> Signed-off-by: Mark Kogan <mkogan@redhat.com> 2020-07-20 10:19:57 +00:00			`if xamzcache:find("aws4_request") ~= nil and uri ~= "/" and uri:find("?") == nil and hdrs["if-match"] == nil then`
rgw: Adding data cache and CDN capabilities This feature is meant to add data cache feature to the RGW. It is using Nginx as a cache server. This feature adds 2 new apis, Auth api and Cache api. Some Performance tests using hsbench: 16K objs: RGW direct access: Mode: GET, Ops: 3001, MB/s: 46.89, Lat(ms): [ min: 30.4, avg: 33.2, 99%: 34.7, max: 35.2 ] Nginx access (objs have not been cached) Mode: GET, Ops: 1363, MB/s: 21.30, Lat(ms): [ min: 63.8, avg: 73.8, 99%: 78.1, max: 86.6 ] Nginx access (objs have been cached) Mode: GET, Ops: 2446, MB/s: 38.22, Lat(ms): [ min: 36.9, avg: 41.0, 99%: 43.9, max: 45.9 ] 512K objs: RGW direct access: Mode: GET, Ops: 1492, MB/s: 746.00 Lat(ms): [ min: 60.4, avg: 66.7, 99%: 73.5, max: 75.9 ] Nginx access (objs have not been cached) Mode: GET, Ops: 1382, MB/s: 691.00, Lat(ms): [ min: 64.5, avg: 72.1, 99%: 77.9, max: 82.8 ] Nginx access (objs have been cached) Mode: GET, Ops: 2947, MB/s: 1473.50, Lat(ms): [ min: 3.3, avg: 32.7, 99%: 62.2, max: 72.1 ] 2M objs: RGW direct access: Mode: GET, Ops: 613, MB/s: 1226.00, Lat(ms): [ min: 143.6, avg: 162.0, 99%: 180.2, max: 190.1 ] Nginx access (objs have not been cached) Mode: GET, Ops: 462, MB/s: 924.00, Lat(ms): [ min: 180.2, avg: 215.0, 99%: 243.2, max: 248.3 ] Nginx access (objs have been cached) Mode: GET, Ops: 1392, MB/s: 2784.00, Lat(ms): [ min: 3.0, avg: 5.3, 99%: 18.8, max: 30.2 ] 10M objs: RGW direct access: Mode: GET, Ops: 135, MB/s: 1350.00, Lat(ms): [ min: 191.1, avg: 265.8, 99%: 373.1, max: 382.8 ] Nginx access (objs have not been cached) Mode: GET, Ops: 120, MB/s: 1200.00, Lat(ms): [ min: 302.1, avg: 428.8, 99%: 561.2, max: 583.7 ] Nginx access (objs have been cached) Mode: GET, Ops: 281, MB/s: 2810.00, Lat(ms): [ min: 3.2, avg: 8.3, 99%: 16.9, max: 25.6 ] gdal_translate 4GiB image gdal_translate -co NUM_THREADS=ALL_CPUS /vsis3/hello/sat.tif Nginx (have not cached): real 0m24.714s user 0m8.692s sys 0m10.360s Nginx (have been cached): real 0m21.070s user 0m9.140s sys 0m10.316s RGW: real 0m21.859s user 0m8.850s sys 0m10.386s The results are showing that for objects larger than 512K the cache will increase the performance by twice or more. For small objs, the overhead of sending the auth request will make the cache less efficient The result for cached objects in the 10MB test can be explained by net limit of 25 Gb/s(it could reach more) In Gdal (image decoder/encoder over s3 using range requests) the results were not that different because of Gdal single cpu encoding/decoding. Gdal have been chosen because of the ability to check the smart cache of the nginx. https://www.nginx.com/blog/smart-efficient-byte-range-caching-nginx/ Signed-off-by: Or Friedmann <ofriedma@redhat.com> 2020-02-03 10:36:10 +00:00			`ngx.var.authvar = xamzcache`
			`end`
			`end`