mirror of
https://github.com/hrsh7th/cmp-buffer
synced 2025-04-06 17:43:10 +00:00
Add options for tweaking indexing speed and enabling synchronous mode
This commit is contained in:
parent
e26cdfb26f
commit
3143b0fb9f
52
README.md
52
README.md
@ -109,6 +109,58 @@ end
|
||||
```
|
||||
|
||||
|
||||
### indexing_interval (type: number)
|
||||
|
||||
_Default:_ `200`
|
||||
|
||||
The rate (in milliseconds) at which buffers are scanned for words when they are first opened.
|
||||
Setting this interval to lower values will increase the speed of indexing, but at the expense of
|
||||
higher CPU usage. By default indexing happens asynchronously, but setting this option to zero or
|
||||
a negative value will switch indexing to a synchronous algorithm, which uses significantly less
|
||||
RAM on big files and takes less time in total (to index the entire file), with the obvious
|
||||
downside of blocking the user interface for a second or two. On small files (up to tens of
|
||||
thousands of lines, probably) the difference will be unnoticeable, though.
|
||||
|
||||
|
||||
### indexing_chunk_size (type: number)
|
||||
|
||||
_Default:_ `1000`
|
||||
|
||||
The number of lines processed in batch every `indexing_interval` milliseconds. Setting it to
|
||||
higher values will make indexing faster, but at the cost of responsiveness of the UI. When using
|
||||
the synchronous mode, changing this option may improve memory usage, though the default value has
|
||||
been tested to be pretty good in this regard.
|
||||
|
||||
Please note that the `indexing_interval` and `indexing_chunk_size` are advanced options, change
|
||||
them only if you experience performance or RAM usage problems (or need to work on particularly
|
||||
large files) and be sure to measure the results!
|
||||
|
||||
|
||||
## Performance on large text files
|
||||
|
||||
This source has been tested on code files of a few megabytes in size (5-10) and it has been
|
||||
optimized for them, however, the indexed words can still take up tens of megabytes of RAM if the
|
||||
file is big (on small files it _will not be more_ than a couple of megabytes, typically much
|
||||
less). So if you wish to avoid accidentally wasting lots of RAM when editing big files, you can
|
||||
tweak `get_bufnrs`, for example like this:
|
||||
|
||||
```lua
|
||||
get_bufnrs = function()
|
||||
local buf = vim.api.nvim_get_current_buf()
|
||||
local byte_size = vim.api.nvim_buf_get_offset(buf, vim.api.nvim_buf_line_count(buf))
|
||||
if byte_size > 1024 * 1024 then -- 1 Megabyte max
|
||||
return {}
|
||||
end
|
||||
return { buf }
|
||||
end
|
||||
```
|
||||
|
||||
Of course, this snippet can be combined with any other recipes for `get_bufnrs`.
|
||||
|
||||
As another tip, turning on the synchronous indexing mode is very likely to help with reducing
|
||||
memory usage, see the `indexing_interval` option.
|
||||
|
||||
|
||||
## Locality bonus comparator (distance-based sorting)
|
||||
|
||||
This source also provides a comparator function which uses information from the word indexer
|
||||
|
@ -2,8 +2,6 @@
|
||||
---@field public bufnr number
|
||||
---@field public opts cmp_buffer.Options
|
||||
---@field public regex any
|
||||
---@field public indexing_chunk_size number
|
||||
---@field public indexing_interval number
|
||||
---@field public timer any|nil
|
||||
---@field public lines_count number
|
||||
---@field public lines_words table<number, string[]>
|
||||
@ -34,8 +32,6 @@ function buffer.new(bufnr, opts)
|
||||
|
||||
self.opts = opts
|
||||
self.regex = vim.regex(self.opts.keyword_pattern)
|
||||
self.indexing_chunk_size = 1000
|
||||
self.indexing_interval = 200
|
||||
|
||||
self.lines_count = 0
|
||||
self.lines_words = {}
|
||||
@ -96,11 +92,18 @@ end
|
||||
---Indexing buffer
|
||||
function buffer.index(self)
|
||||
self.lines_count = vim.api.nvim_buf_line_count(self.bufnr)
|
||||
for i = 1, self.lines_count do
|
||||
self.lines_words[i] = {}
|
||||
end
|
||||
-- NOTE: Pre-allocating self.lines_words here somehow wastes more memory, and
|
||||
-- not doing that doesn't have a visible effect on performance. Win-win.
|
||||
-- for i = 1, self.lines_count do
|
||||
-- self.lines_words[i] = {}
|
||||
-- end
|
||||
|
||||
self:index_range_async(0, self.lines_count)
|
||||
if self.opts.indexing_interval <= 0 then
|
||||
self:index_range(0, self.lines_count, self.opts.indexing_chunk_size)
|
||||
self:mark_all_lines_dirty()
|
||||
else
|
||||
self:index_range_async(0, self.lines_count, self.opts.indexing_chunk_size)
|
||||
end
|
||||
end
|
||||
|
||||
--- Workaround for https://github.com/neovim/neovim/issues/16729
|
||||
@ -112,30 +115,52 @@ function buffer.safe_buf_call(self, callback)
|
||||
end
|
||||
end
|
||||
|
||||
function buffer.index_range(self, range_start, range_end)
|
||||
--- sync algorithm
|
||||
function buffer.index_range(self, range_start, range_end, chunk_size)
|
||||
self:safe_buf_call(function()
|
||||
local lines = vim.api.nvim_buf_get_lines(self.bufnr, range_start, range_end, true)
|
||||
for i, line in ipairs(lines) do
|
||||
self:index_line(range_start + i, line)
|
||||
if chunk_size < 1 then
|
||||
chunk_size = range_end - range_start
|
||||
end
|
||||
local chunk_start = range_start
|
||||
while chunk_start < range_end do
|
||||
local chunk_end = math.min(chunk_start + chunk_size, range_end)
|
||||
-- For some reason requesting line arrays multiple times in chunks leads
|
||||
-- to much better memory usage than doing that in one big array, which is
|
||||
-- why the sync algorithm has better memory usage than the async one.
|
||||
local chunk_lines = vim.api.nvim_buf_get_lines(self.bufnr, chunk_start, chunk_end, true)
|
||||
for i, line in ipairs(chunk_lines) do
|
||||
self:index_line(chunk_start + i, line)
|
||||
end
|
||||
chunk_start = chunk_end
|
||||
end
|
||||
end)
|
||||
end
|
||||
|
||||
function buffer.index_range_async(self, range_start, range_end)
|
||||
--- async algorithm
|
||||
function buffer.index_range_async(self, range_start, range_end, chunk_size)
|
||||
if chunk_size < 1 then
|
||||
chunk_size = range_end - range_start
|
||||
end
|
||||
local chunk_start = range_start
|
||||
|
||||
local lines = vim.api.nvim_buf_get_lines(self.bufnr, range_start, range_end, true)
|
||||
-- This flag prevents vim.schedule() callbacks from piling up in the queue
|
||||
-- when the indexing interval is very short.
|
||||
local scheduled = false
|
||||
|
||||
self.timer = vim.loop.new_timer()
|
||||
self.timer:start(
|
||||
0,
|
||||
self.indexing_interval,
|
||||
vim.schedule_wrap(function()
|
||||
self.timer:start(0, self.opts.indexing_interval, function()
|
||||
if scheduled then
|
||||
return
|
||||
end
|
||||
scheduled = true
|
||||
vim.schedule(function()
|
||||
scheduled = false
|
||||
if self.closed then
|
||||
return
|
||||
end
|
||||
|
||||
local chunk_end = math.min(chunk_start + self.indexing_chunk_size, range_end)
|
||||
local chunk_end = math.min(chunk_start + chunk_size, range_end)
|
||||
self:safe_buf_call(function()
|
||||
for linenr = chunk_start + 1, chunk_end do
|
||||
self:index_line(linenr, lines[linenr])
|
||||
@ -149,7 +174,7 @@ function buffer.index_range_async(self, range_start, range_end)
|
||||
self:stop_indexing_timer()
|
||||
end
|
||||
end)
|
||||
)
|
||||
end)
|
||||
end
|
||||
|
||||
--- watch
|
||||
@ -219,7 +244,7 @@ function buffer.watch(self)
|
||||
self.lines_count = new_lines_count
|
||||
|
||||
-- replace lines
|
||||
self:index_range(first_line, new_last_line)
|
||||
self:index_range(first_line, new_last_line, self.opts.indexing_chunk_size)
|
||||
|
||||
if first_line == self.last_edit_first_line and old_last_line == self.last_edit_last_line and new_last_line == self.last_edit_last_line then
|
||||
self.unique_words_curr_line_dirty = true
|
||||
@ -242,9 +267,11 @@ function buffer.watch(self)
|
||||
-- because tables of all lines can be assumed to be fresh.
|
||||
local new_lines_count = vim.api.nvim_buf_line_count(self.bufnr)
|
||||
if new_lines_count > self.lines_count then -- append
|
||||
for i = self.lines_count + 1, new_lines_count do
|
||||
self.lines_words[i] = {}
|
||||
end
|
||||
-- Again, no need to pre-allocate, index_line will append new lines
|
||||
-- itself.
|
||||
-- for i = self.lines_count + 1, new_lines_count do
|
||||
-- self.lines_words[i] = {}
|
||||
-- end
|
||||
elseif new_lines_count < self.lines_count then -- remove
|
||||
for i = self.lines_count, new_lines_count + 1, -1 do
|
||||
self.lines_words[i] = nil
|
||||
@ -252,7 +279,7 @@ function buffer.watch(self)
|
||||
end
|
||||
self.lines_count = new_lines_count
|
||||
|
||||
self:index_range(0, self.lines_count)
|
||||
self:index_range(0, self.lines_count, self.opts.indexing_chunk_size)
|
||||
self:mark_all_lines_dirty()
|
||||
self.words_distances_dirty = true
|
||||
end,
|
||||
|
@ -4,6 +4,8 @@ local buffer = require('cmp_buffer.buffer')
|
||||
---@field public keyword_length number
|
||||
---@field public keyword_pattern string
|
||||
---@field public get_bufnrs fun(): number[]
|
||||
---@field public indexing_chunk_size number
|
||||
---@field public indexing_interval number
|
||||
|
||||
---@type cmp_buffer.Options
|
||||
local defaults = {
|
||||
@ -12,6 +14,8 @@ local defaults = {
|
||||
get_bufnrs = function()
|
||||
return { vim.api.nvim_get_current_buf() }
|
||||
end,
|
||||
indexing_chunk_size = 1000,
|
||||
indexing_interval = 200,
|
||||
}
|
||||
|
||||
local source = {}
|
||||
@ -29,6 +33,8 @@ source._validate_options = function(_, params)
|
||||
keyword_length = { opts.keyword_length, 'number' },
|
||||
keyword_pattern = { opts.keyword_pattern, 'string' },
|
||||
get_bufnrs = { opts.get_bufnrs, 'function' },
|
||||
indexing_chunk_size = { opts.indexing_chunk_size, 'number' },
|
||||
indexing_interval = { opts.indexing_interval, 'number' },
|
||||
})
|
||||
return opts
|
||||
end
|
||||
|
Loading…
Reference in New Issue
Block a user