Add options for tweaking indexing speed and enabling synchronous mode

This commit is contained in:
Dmytro Meleshko 2021-11-07 18:41:16 +02:00
parent e26cdfb26f
commit 3143b0fb9f
3 changed files with 109 additions and 24 deletions

View File

@ -109,6 +109,58 @@ end
``` ```
### indexing_interval (type: number)
_Default:_ `200`
The rate (in milliseconds) at which buffers are scanned for words when they are first opened.
Setting this interval to lower values will increase the speed of indexing, but at the expense of
higher CPU usage. By default indexing happens asynchronously, but setting this option to zero or
a negative value will switch indexing to a synchronous algorithm, which uses significantly less
RAM on big files and takes less time in total (to index the entire file), with the obvious
downside of blocking the user interface for a second or two. On small files (up to tens of
thousands of lines, probably) the difference will be unnoticeable, though.
### indexing_chunk_size (type: number)
_Default:_ `1000`
The number of lines processed in batch every `indexing_interval` milliseconds. Setting it to
higher values will make indexing faster, but at the cost of responsiveness of the UI. When using
the synchronous mode, changing this option may improve memory usage, though the default value has
been tested to be pretty good in this regard.
Please note that the `indexing_interval` and `indexing_chunk_size` are advanced options, change
them only if you experience performance or RAM usage problems (or need to work on particularly
large files) and be sure to measure the results!
## Performance on large text files
This source has been tested on code files of a few megabytes in size (5-10) and it has been
optimized for them, however, the indexed words can still take up tens of megabytes of RAM if the
file is big (on small files it _will not be more_ than a couple of megabytes, typically much
less). So if you wish to avoid accidentally wasting lots of RAM when editing big files, you can
tweak `get_bufnrs`, for example like this:
```lua
get_bufnrs = function()
local buf = vim.api.nvim_get_current_buf()
local byte_size = vim.api.nvim_buf_get_offset(buf, vim.api.nvim_buf_line_count(buf))
if byte_size > 1024 * 1024 then -- 1 Megabyte max
return {}
end
return { buf }
end
```
Of course, this snippet can be combined with any other recipes for `get_bufnrs`.
As another tip, turning on the synchronous indexing mode is very likely to help with reducing
memory usage, see the `indexing_interval` option.
## Locality bonus comparator (distance-based sorting) ## Locality bonus comparator (distance-based sorting)
This source also provides a comparator function which uses information from the word indexer This source also provides a comparator function which uses information from the word indexer

View File

@ -2,8 +2,6 @@
---@field public bufnr number ---@field public bufnr number
---@field public opts cmp_buffer.Options ---@field public opts cmp_buffer.Options
---@field public regex any ---@field public regex any
---@field public indexing_chunk_size number
---@field public indexing_interval number
---@field public timer any|nil ---@field public timer any|nil
---@field public lines_count number ---@field public lines_count number
---@field public lines_words table<number, string[]> ---@field public lines_words table<number, string[]>
@ -34,8 +32,6 @@ function buffer.new(bufnr, opts)
self.opts = opts self.opts = opts
self.regex = vim.regex(self.opts.keyword_pattern) self.regex = vim.regex(self.opts.keyword_pattern)
self.indexing_chunk_size = 1000
self.indexing_interval = 200
self.lines_count = 0 self.lines_count = 0
self.lines_words = {} self.lines_words = {}
@ -96,11 +92,18 @@ end
---Indexing buffer ---Indexing buffer
function buffer.index(self) function buffer.index(self)
self.lines_count = vim.api.nvim_buf_line_count(self.bufnr) self.lines_count = vim.api.nvim_buf_line_count(self.bufnr)
for i = 1, self.lines_count do -- NOTE: Pre-allocating self.lines_words here somehow wastes more memory, and
self.lines_words[i] = {} -- not doing that doesn't have a visible effect on performance. Win-win.
end -- for i = 1, self.lines_count do
-- self.lines_words[i] = {}
-- end
self:index_range_async(0, self.lines_count) if self.opts.indexing_interval <= 0 then
self:index_range(0, self.lines_count, self.opts.indexing_chunk_size)
self:mark_all_lines_dirty()
else
self:index_range_async(0, self.lines_count, self.opts.indexing_chunk_size)
end
end end
--- Workaround for https://github.com/neovim/neovim/issues/16729 --- Workaround for https://github.com/neovim/neovim/issues/16729
@ -112,30 +115,52 @@ function buffer.safe_buf_call(self, callback)
end end
end end
function buffer.index_range(self, range_start, range_end) --- sync algorithm
function buffer.index_range(self, range_start, range_end, chunk_size)
self:safe_buf_call(function() self:safe_buf_call(function()
local lines = vim.api.nvim_buf_get_lines(self.bufnr, range_start, range_end, true) if chunk_size < 1 then
for i, line in ipairs(lines) do chunk_size = range_end - range_start
self:index_line(range_start + i, line) end
local chunk_start = range_start
while chunk_start < range_end do
local chunk_end = math.min(chunk_start + chunk_size, range_end)
-- For some reason requesting line arrays multiple times in chunks leads
-- to much better memory usage than doing that in one big array, which is
-- why the sync algorithm has better memory usage than the async one.
local chunk_lines = vim.api.nvim_buf_get_lines(self.bufnr, chunk_start, chunk_end, true)
for i, line in ipairs(chunk_lines) do
self:index_line(chunk_start + i, line)
end
chunk_start = chunk_end
end end
end) end)
end end
function buffer.index_range_async(self, range_start, range_end) --- async algorithm
function buffer.index_range_async(self, range_start, range_end, chunk_size)
if chunk_size < 1 then
chunk_size = range_end - range_start
end
local chunk_start = range_start local chunk_start = range_start
local lines = vim.api.nvim_buf_get_lines(self.bufnr, range_start, range_end, true) local lines = vim.api.nvim_buf_get_lines(self.bufnr, range_start, range_end, true)
-- This flag prevents vim.schedule() callbacks from piling up in the queue
-- when the indexing interval is very short.
local scheduled = false
self.timer = vim.loop.new_timer() self.timer = vim.loop.new_timer()
self.timer:start( self.timer:start(0, self.opts.indexing_interval, function()
0, if scheduled then
self.indexing_interval, return
vim.schedule_wrap(function() end
scheduled = true
vim.schedule(function()
scheduled = false
if self.closed then if self.closed then
return return
end end
local chunk_end = math.min(chunk_start + self.indexing_chunk_size, range_end) local chunk_end = math.min(chunk_start + chunk_size, range_end)
self:safe_buf_call(function() self:safe_buf_call(function()
for linenr = chunk_start + 1, chunk_end do for linenr = chunk_start + 1, chunk_end do
self:index_line(linenr, lines[linenr]) self:index_line(linenr, lines[linenr])
@ -149,7 +174,7 @@ function buffer.index_range_async(self, range_start, range_end)
self:stop_indexing_timer() self:stop_indexing_timer()
end end
end) end)
) end)
end end
--- watch --- watch
@ -219,7 +244,7 @@ function buffer.watch(self)
self.lines_count = new_lines_count self.lines_count = new_lines_count
-- replace lines -- replace lines
self:index_range(first_line, new_last_line) self:index_range(first_line, new_last_line, self.opts.indexing_chunk_size)
if first_line == self.last_edit_first_line and old_last_line == self.last_edit_last_line and new_last_line == self.last_edit_last_line then if first_line == self.last_edit_first_line and old_last_line == self.last_edit_last_line and new_last_line == self.last_edit_last_line then
self.unique_words_curr_line_dirty = true self.unique_words_curr_line_dirty = true
@ -242,9 +267,11 @@ function buffer.watch(self)
-- because tables of all lines can be assumed to be fresh. -- because tables of all lines can be assumed to be fresh.
local new_lines_count = vim.api.nvim_buf_line_count(self.bufnr) local new_lines_count = vim.api.nvim_buf_line_count(self.bufnr)
if new_lines_count > self.lines_count then -- append if new_lines_count > self.lines_count then -- append
for i = self.lines_count + 1, new_lines_count do -- Again, no need to pre-allocate, index_line will append new lines
self.lines_words[i] = {} -- itself.
end -- for i = self.lines_count + 1, new_lines_count do
-- self.lines_words[i] = {}
-- end
elseif new_lines_count < self.lines_count then -- remove elseif new_lines_count < self.lines_count then -- remove
for i = self.lines_count, new_lines_count + 1, -1 do for i = self.lines_count, new_lines_count + 1, -1 do
self.lines_words[i] = nil self.lines_words[i] = nil
@ -252,7 +279,7 @@ function buffer.watch(self)
end end
self.lines_count = new_lines_count self.lines_count = new_lines_count
self:index_range(0, self.lines_count) self:index_range(0, self.lines_count, self.opts.indexing_chunk_size)
self:mark_all_lines_dirty() self:mark_all_lines_dirty()
self.words_distances_dirty = true self.words_distances_dirty = true
end, end,

View File

@ -4,6 +4,8 @@ local buffer = require('cmp_buffer.buffer')
---@field public keyword_length number ---@field public keyword_length number
---@field public keyword_pattern string ---@field public keyword_pattern string
---@field public get_bufnrs fun(): number[] ---@field public get_bufnrs fun(): number[]
---@field public indexing_chunk_size number
---@field public indexing_interval number
---@type cmp_buffer.Options ---@type cmp_buffer.Options
local defaults = { local defaults = {
@ -12,6 +14,8 @@ local defaults = {
get_bufnrs = function() get_bufnrs = function()
return { vim.api.nvim_get_current_buf() } return { vim.api.nvim_get_current_buf() }
end, end,
indexing_chunk_size = 1000,
indexing_interval = 200,
} }
local source = {} local source = {}
@ -29,6 +33,8 @@ source._validate_options = function(_, params)
keyword_length = { opts.keyword_length, 'number' }, keyword_length = { opts.keyword_length, 'number' },
keyword_pattern = { opts.keyword_pattern, 'string' }, keyword_pattern = { opts.keyword_pattern, 'string' },
get_bufnrs = { opts.get_bufnrs, 'function' }, get_bufnrs = { opts.get_bufnrs, 'function' },
indexing_chunk_size = { opts.indexing_chunk_size, 'number' },
indexing_interval = { opts.indexing_interval, 'number' },
}) })
return opts return opts
end end