Add options for tweaking indexing speed and enabling synchronous mode

This commit is contained in:
Dmytro Meleshko 2021-11-07 18:41:16 +02:00
parent e26cdfb26f
commit 3143b0fb9f
3 changed files with 109 additions and 24 deletions

View File

@ -109,6 +109,58 @@ end
```
### indexing_interval (type: number)
_Default:_ `200`
The rate (in milliseconds) at which buffers are scanned for words when they are first opened.
Setting this interval to lower values will increase the speed of indexing, but at the expense of
higher CPU usage. By default indexing happens asynchronously, but setting this option to zero or
a negative value will switch indexing to a synchronous algorithm, which uses significantly less
RAM on big files and takes less time in total (to index the entire file), with the obvious
downside of blocking the user interface for a second or two. On small files (up to tens of
thousands of lines, probably) the difference will be unnoticeable, though.
### indexing_chunk_size (type: number)
_Default:_ `1000`
The number of lines processed in batch every `indexing_interval` milliseconds. Setting it to
higher values will make indexing faster, but at the cost of responsiveness of the UI. When using
the synchronous mode, changing this option may improve memory usage, though the default value has
been tested to be pretty good in this regard.
Please note that the `indexing_interval` and `indexing_chunk_size` are advanced options, change
them only if you experience performance or RAM usage problems (or need to work on particularly
large files) and be sure to measure the results!
## Performance on large text files
This source has been tested on code files of a few megabytes in size (5-10) and it has been
optimized for them, however, the indexed words can still take up tens of megabytes of RAM if the
file is big (on small files it _will not be more_ than a couple of megabytes, typically much
less). So if you wish to avoid accidentally wasting lots of RAM when editing big files, you can
tweak `get_bufnrs`, for example like this:
```lua
get_bufnrs = function()
local buf = vim.api.nvim_get_current_buf()
local byte_size = vim.api.nvim_buf_get_offset(buf, vim.api.nvim_buf_line_count(buf))
if byte_size > 1024 * 1024 then -- 1 Megabyte max
return {}
end
return { buf }
end
```
Of course, this snippet can be combined with any other recipes for `get_bufnrs`.
As another tip, turning on the synchronous indexing mode is very likely to help with reducing
memory usage, see the `indexing_interval` option.
## Locality bonus comparator (distance-based sorting)
This source also provides a comparator function which uses information from the word indexer

View File

@ -2,8 +2,6 @@
---@field public bufnr number
---@field public opts cmp_buffer.Options
---@field public regex any
---@field public indexing_chunk_size number
---@field public indexing_interval number
---@field public timer any|nil
---@field public lines_count number
---@field public lines_words table<number, string[]>
@ -34,8 +32,6 @@ function buffer.new(bufnr, opts)
self.opts = opts
self.regex = vim.regex(self.opts.keyword_pattern)
self.indexing_chunk_size = 1000
self.indexing_interval = 200
self.lines_count = 0
self.lines_words = {}
@ -96,11 +92,18 @@ end
---Indexing buffer
function buffer.index(self)
self.lines_count = vim.api.nvim_buf_line_count(self.bufnr)
for i = 1, self.lines_count do
self.lines_words[i] = {}
end
-- NOTE: Pre-allocating self.lines_words here somehow wastes more memory, and
-- not doing that doesn't have a visible effect on performance. Win-win.
-- for i = 1, self.lines_count do
-- self.lines_words[i] = {}
-- end
self:index_range_async(0, self.lines_count)
if self.opts.indexing_interval <= 0 then
self:index_range(0, self.lines_count, self.opts.indexing_chunk_size)
self:mark_all_lines_dirty()
else
self:index_range_async(0, self.lines_count, self.opts.indexing_chunk_size)
end
end
--- Workaround for https://github.com/neovim/neovim/issues/16729
@ -112,30 +115,52 @@ function buffer.safe_buf_call(self, callback)
end
end
function buffer.index_range(self, range_start, range_end)
--- sync algorithm
function buffer.index_range(self, range_start, range_end, chunk_size)
self:safe_buf_call(function()
local lines = vim.api.nvim_buf_get_lines(self.bufnr, range_start, range_end, true)
for i, line in ipairs(lines) do
self:index_line(range_start + i, line)
if chunk_size < 1 then
chunk_size = range_end - range_start
end
local chunk_start = range_start
while chunk_start < range_end do
local chunk_end = math.min(chunk_start + chunk_size, range_end)
-- For some reason requesting line arrays multiple times in chunks leads
-- to much better memory usage than doing that in one big array, which is
-- why the sync algorithm has better memory usage than the async one.
local chunk_lines = vim.api.nvim_buf_get_lines(self.bufnr, chunk_start, chunk_end, true)
for i, line in ipairs(chunk_lines) do
self:index_line(chunk_start + i, line)
end
chunk_start = chunk_end
end
end)
end
function buffer.index_range_async(self, range_start, range_end)
--- async algorithm
function buffer.index_range_async(self, range_start, range_end, chunk_size)
if chunk_size < 1 then
chunk_size = range_end - range_start
end
local chunk_start = range_start
local lines = vim.api.nvim_buf_get_lines(self.bufnr, range_start, range_end, true)
-- This flag prevents vim.schedule() callbacks from piling up in the queue
-- when the indexing interval is very short.
local scheduled = false
self.timer = vim.loop.new_timer()
self.timer:start(
0,
self.indexing_interval,
vim.schedule_wrap(function()
self.timer:start(0, self.opts.indexing_interval, function()
if scheduled then
return
end
scheduled = true
vim.schedule(function()
scheduled = false
if self.closed then
return
end
local chunk_end = math.min(chunk_start + self.indexing_chunk_size, range_end)
local chunk_end = math.min(chunk_start + chunk_size, range_end)
self:safe_buf_call(function()
for linenr = chunk_start + 1, chunk_end do
self:index_line(linenr, lines[linenr])
@ -149,7 +174,7 @@ function buffer.index_range_async(self, range_start, range_end)
self:stop_indexing_timer()
end
end)
)
end)
end
--- watch
@ -219,7 +244,7 @@ function buffer.watch(self)
self.lines_count = new_lines_count
-- replace lines
self:index_range(first_line, new_last_line)
self:index_range(first_line, new_last_line, self.opts.indexing_chunk_size)
if first_line == self.last_edit_first_line and old_last_line == self.last_edit_last_line and new_last_line == self.last_edit_last_line then
self.unique_words_curr_line_dirty = true
@ -242,9 +267,11 @@ function buffer.watch(self)
-- because tables of all lines can be assumed to be fresh.
local new_lines_count = vim.api.nvim_buf_line_count(self.bufnr)
if new_lines_count > self.lines_count then -- append
for i = self.lines_count + 1, new_lines_count do
self.lines_words[i] = {}
end
-- Again, no need to pre-allocate, index_line will append new lines
-- itself.
-- for i = self.lines_count + 1, new_lines_count do
-- self.lines_words[i] = {}
-- end
elseif new_lines_count < self.lines_count then -- remove
for i = self.lines_count, new_lines_count + 1, -1 do
self.lines_words[i] = nil
@ -252,7 +279,7 @@ function buffer.watch(self)
end
self.lines_count = new_lines_count
self:index_range(0, self.lines_count)
self:index_range(0, self.lines_count, self.opts.indexing_chunk_size)
self:mark_all_lines_dirty()
self.words_distances_dirty = true
end,

View File

@ -4,6 +4,8 @@ local buffer = require('cmp_buffer.buffer')
---@field public keyword_length number
---@field public keyword_pattern string
---@field public get_bufnrs fun(): number[]
---@field public indexing_chunk_size number
---@field public indexing_interval number
---@type cmp_buffer.Options
local defaults = {
@ -12,6 +14,8 @@ local defaults = {
get_bufnrs = function()
return { vim.api.nvim_get_current_buf() }
end,
indexing_chunk_size = 1000,
indexing_interval = 200,
}
local source = {}
@ -29,6 +33,8 @@ source._validate_options = function(_, params)
keyword_length = { opts.keyword_length, 'number' },
keyword_pattern = { opts.keyword_pattern, 'string' },
get_bufnrs = { opts.get_bufnrs, 'function' },
indexing_chunk_size = { opts.indexing_chunk_size, 'number' },
indexing_interval = { opts.indexing_interval, 'number' },
})
return opts
end