mirror of
https://github.com/hrsh7th/cmp-buffer
synced 2025-05-06 10:09:52 +00:00
Merge pull request #23 from dmitmel/synchronous-indexing
Improve reliability of async indexing while the user is editing the file, implement the memory usage optimization for the indexer, make its speed configurable
This commit is contained in:
commit
a01cfeca70
63
README.md
63
README.md
@ -109,6 +109,20 @@ end
|
||||
```
|
||||
|
||||
|
||||
### indexing_interval (type: number)
|
||||
|
||||
_Default:_ `100`
|
||||
|
||||
Advanced option. See the section [Indexing](#indexing).
|
||||
|
||||
|
||||
### indexing_batch_size (type: number)
|
||||
|
||||
_Default:_ `1000`
|
||||
|
||||
Advanced option. See the section [Indexing](#indexing).
|
||||
|
||||
|
||||
## Locality bonus comparator (distance-based sorting)
|
||||
|
||||
This source also provides a comparator function which uses information from the word indexer
|
||||
@ -133,3 +147,52 @@ cmp.setup({
|
||||
}
|
||||
})
|
||||
```
|
||||
|
||||
|
||||
## Indexing
|
||||
|
||||
When a buffer is opened, this source first has to scan all lines in the buffer, match all words
|
||||
and store all of their occurrences. This process is called _indexing_. When actually editing the
|
||||
text in the buffer, the index of words is kept up-to-date with changes to the buffer's contents,
|
||||
this is called _watching_. It is done by re-running the indexer on just the changed lines.
|
||||
Indexing happens completely asynchronously in background, unlike watching, which must be performed
|
||||
synchronously to ensure that the index of words is kept perfectly in-sync with the lines in the
|
||||
buffer. However, most of the time this will not be a problem since many typical text edit
|
||||
operations affect only one or two lines, unless you are pasting a 1000-line snippet.
|
||||
|
||||
_Note that you can freely edit the buffer while it is being indexed_, the underlying algorithm is
|
||||
written in such a way that your changes will not break the index or cause errors. If a crash does
|
||||
happen - it is a bug, so please report it.
|
||||
|
||||
The speed of indexing is configurable with two options: `indexing_interval` and
|
||||
`indexing_batch_size`. Essentially, when indexing, a timer is started, which pulls a batch of
|
||||
`indexing_batch_size` lines from the buffer, scans them for words, and repeats after
|
||||
`indexing_interval` milliseconds. Decreasing interval and/or increasing the batch size will make
|
||||
the indexer faster, but at the expense of higher CPU usage and more lag when editing the file
|
||||
while indexing is still in progress. Setting `indexing_batch_size` to a negative value will switch
|
||||
the indexer to the "synchronous" mode: this will process all lines in one go, take less time in
|
||||
total (since no other code will be running on the Lua thread), but with the obvious downside that
|
||||
the editor UI will be blocked.
|
||||
|
||||
### Performance on large text files
|
||||
|
||||
This source has been tested on code files of a few megabytes in size (5-10) and contains
|
||||
optimizations for them, however, the indexed words can still take up tens of megabytes of RAM if
|
||||
the file is large. It also currently has troubles on files with very long lines, see issue
|
||||
[#13](https://github.com/hrsh7th/cmp-buffer/issues/13).
|
||||
|
||||
So, if you wish to avoid accidentally running this source on big files, you can tweak
|
||||
`get_bufnrs`, for example like this:
|
||||
|
||||
```lua
|
||||
get_bufnrs = function()
|
||||
local buf = vim.api.nvim_get_current_buf()
|
||||
local byte_size = vim.api.nvim_buf_get_offset(buf, vim.api.nvim_buf_line_count(buf))
|
||||
if byte_size > 1024 * 1024 then -- 1 Megabyte max
|
||||
return {}
|
||||
end
|
||||
return { buf }
|
||||
end
|
||||
```
|
||||
|
||||
Of course, this snippet can be combined with any other recipes for `get_bufnrs`.
|
||||
|
@ -1,11 +1,18 @@
|
||||
local timer = require('cmp_buffer.timer')
|
||||
|
||||
local function clear_table(tbl)
|
||||
for k in pairs(tbl) do
|
||||
tbl[k] = nil
|
||||
end
|
||||
end
|
||||
|
||||
---@class cmp_buffer.Buffer
|
||||
---@field public bufnr number
|
||||
---@field public opts cmp_buffer.Options
|
||||
---@field public regex any
|
||||
---@field public indexing_chunk_size number
|
||||
---@field public indexing_interval number
|
||||
---@field public timer any|nil
|
||||
---@field public timer cmp_buffer.Timer
|
||||
---@field public lines_count number
|
||||
---@field public timer_current_line number
|
||||
---@field public lines_words table<number, string[]>
|
||||
---@field public unique_words_curr_line table<string, boolean>
|
||||
---@field public unique_words_other_lines table<string, boolean>
|
||||
@ -20,6 +27,10 @@
|
||||
---@field public words_distances_dirty boolean
|
||||
local buffer = {}
|
||||
|
||||
-- For some reason requesting this much lines multiple times in chunks leads to
|
||||
-- much better memory usage than fetching the entire file in one go.
|
||||
buffer.GET_LINES_CHUNK_SIZE = 1000
|
||||
|
||||
---Create new buffer object
|
||||
---@param bufnr number
|
||||
---@param opts cmp_buffer.Options
|
||||
@ -28,16 +39,15 @@ function buffer.new(bufnr, opts)
|
||||
local self = setmetatable({}, { __index = buffer })
|
||||
|
||||
self.bufnr = bufnr
|
||||
self.timer = nil
|
||||
self.timer = timer.new()
|
||||
self.closed = false
|
||||
self.on_close_cb = nil
|
||||
|
||||
self.opts = opts
|
||||
self.regex = vim.regex(self.opts.keyword_pattern)
|
||||
self.indexing_chunk_size = 1000
|
||||
self.indexing_interval = 200
|
||||
|
||||
self.lines_count = 0
|
||||
self.timer_current_line = -1
|
||||
self.lines_words = {}
|
||||
|
||||
self.unique_words_curr_line = {}
|
||||
@ -58,8 +68,11 @@ end
|
||||
function buffer.close(self)
|
||||
self.closed = true
|
||||
self:stop_indexing_timer()
|
||||
self.timer:close()
|
||||
self.timer = nil
|
||||
|
||||
self.lines_count = 0
|
||||
self.timer_current_line = -1
|
||||
self.lines_words = {}
|
||||
|
||||
self.unique_words_curr_line = {}
|
||||
@ -79,11 +92,8 @@ function buffer.close(self)
|
||||
end
|
||||
|
||||
function buffer.stop_indexing_timer(self)
|
||||
if self.timer and not self.timer:is_closing() then
|
||||
self.timer:stop()
|
||||
self.timer:close()
|
||||
end
|
||||
self.timer = nil
|
||||
self.timer_current_line = -1
|
||||
end
|
||||
|
||||
function buffer.mark_all_lines_dirty(self)
|
||||
@ -91,16 +101,7 @@ function buffer.mark_all_lines_dirty(self)
|
||||
self.unique_words_other_lines_dirty = true
|
||||
self.last_edit_first_line = 0
|
||||
self.last_edit_last_line = 0
|
||||
end
|
||||
|
||||
---Indexing buffer
|
||||
function buffer.index(self)
|
||||
self.lines_count = vim.api.nvim_buf_line_count(self.bufnr)
|
||||
for i = 1, self.lines_count do
|
||||
self.lines_words[i] = {}
|
||||
end
|
||||
|
||||
self:index_range_async(0, self.lines_count)
|
||||
self.words_distances_dirty = true
|
||||
end
|
||||
|
||||
--- Workaround for https://github.com/neovim/neovim/issues/16729
|
||||
@ -112,48 +113,67 @@ function buffer.safe_buf_call(self, callback)
|
||||
end
|
||||
end
|
||||
|
||||
function buffer.index_range(self, range_start, range_end)
|
||||
function buffer.index_range(self, range_start, range_end, skip_already_indexed)
|
||||
self:safe_buf_call(function()
|
||||
local lines = vim.api.nvim_buf_get_lines(self.bufnr, range_start, range_end, true)
|
||||
for i, line in ipairs(lines) do
|
||||
self:index_line(range_start + i, line)
|
||||
local chunk_size = self.GET_LINES_CHUNK_SIZE
|
||||
local chunk_start = range_start
|
||||
while chunk_start < range_end do
|
||||
local chunk_end = math.min(chunk_start + chunk_size, range_end)
|
||||
local chunk_lines = vim.api.nvim_buf_get_lines(self.bufnr, chunk_start, chunk_end, true)
|
||||
for i, line in ipairs(chunk_lines) do
|
||||
if not skip_already_indexed or not self.lines_words[chunk_start + i] then
|
||||
self:index_line(chunk_start + i, line)
|
||||
end
|
||||
end
|
||||
chunk_start = chunk_end
|
||||
end
|
||||
end)
|
||||
end
|
||||
|
||||
function buffer.index_range_async(self, range_start, range_end)
|
||||
local chunk_start = range_start
|
||||
function buffer.start_indexing_timer(self)
|
||||
self.lines_count = vim.api.nvim_buf_line_count(self.bufnr)
|
||||
self.timer_current_line = 0
|
||||
|
||||
local lines = vim.api.nvim_buf_get_lines(self.bufnr, range_start, range_end, true)
|
||||
|
||||
self.timer = vim.loop.new_timer()
|
||||
self.timer:start(
|
||||
0,
|
||||
self.indexing_interval,
|
||||
vim.schedule_wrap(function()
|
||||
-- Negative values result in an integer overflow in luv (vim.loop), and zero
|
||||
-- disables timer repeat, so only intervals larger than 1 are valid.
|
||||
local interval = math.max(1, self.opts.indexing_interval)
|
||||
self.timer:start(0, interval, function()
|
||||
if self.closed then
|
||||
self:stop_indexing_timer()
|
||||
return
|
||||
end
|
||||
|
||||
local chunk_end = math.min(chunk_start + self.indexing_chunk_size, range_end)
|
||||
self:safe_buf_call(function()
|
||||
for linenr = chunk_start + 1, chunk_end do
|
||||
self:index_line(linenr, lines[linenr])
|
||||
end
|
||||
end)
|
||||
chunk_start = chunk_end
|
||||
self:mark_all_lines_dirty()
|
||||
self.words_distances_dirty = true
|
||||
-- Note that the async indexer is designed to not break even if the user is
|
||||
-- editing the file while it is in the process of being indexed. Because
|
||||
-- the indexing in watcher must use the synchronous algorithm, we assume
|
||||
-- that the data already present in self.lines_words to be correct and
|
||||
-- doesn't need refreshing here because even if we do receive text from
|
||||
-- nvim_buf_get_lines different from what the watcher has seen so far, it
|
||||
-- (the watcher) will catch up on the next on_lines event.
|
||||
|
||||
if chunk_end >= range_end then
|
||||
-- Skip over the already indexed lines
|
||||
while self.lines_words[self.timer_current_line + 1] do
|
||||
self.timer_current_line = self.timer_current_line + 1
|
||||
end
|
||||
|
||||
local batch_start = self.timer_current_line
|
||||
local batch_size = self.opts.indexing_batch_size
|
||||
-- NOTE: self.lines_count may be modified by the indexer.
|
||||
local batch_end = batch_size >= 1 and math.min(batch_start + batch_size, self.lines_count) or self.lines_count
|
||||
if batch_end >= self.lines_count then
|
||||
self:stop_indexing_timer()
|
||||
end
|
||||
self.timer_current_line = batch_end
|
||||
self:mark_all_lines_dirty()
|
||||
|
||||
self:index_range(batch_start, batch_end, true)
|
||||
end)
|
||||
)
|
||||
end
|
||||
|
||||
--- watch
|
||||
function buffer.watch(self)
|
||||
self.lines_count = vim.api.nvim_buf_line_count(self.bufnr)
|
||||
|
||||
-- NOTE: As far as I know, indexing in watching can't be done asynchronously
|
||||
-- because even built-in commands generate multiple consequent `on_lines`
|
||||
-- events, and I'm not even mentioning plugins here. To get accurate results
|
||||
@ -218,8 +238,24 @@ function buffer.watch(self)
|
||||
end
|
||||
self.lines_count = new_lines_count
|
||||
|
||||
-- replace lines
|
||||
self:index_range(first_line, new_last_line)
|
||||
-- This branch is support code for handling cases when the user is
|
||||
-- editing the buffer while the async indexer is running. It solves the
|
||||
-- problem that if new lines are inserted or old lines are deleted, the
|
||||
-- indexes of each subsequent line will change, and so the indexer
|
||||
-- current position must be adjusted to not accidentally skip any lines.
|
||||
if self.timer:is_active() then
|
||||
if first_line <= self.timer_current_line and self.timer_current_line < old_last_line then
|
||||
-- The indexer was in the area of the current text edit. We will
|
||||
-- synchronously index this area it in a moment, so the indexer
|
||||
-- should resume from right after the edit range.
|
||||
self.timer_current_line = new_last_line
|
||||
elseif self.timer_current_line >= old_last_line then
|
||||
-- The indexer was somewhere past the current text edit. This means
|
||||
-- that the line numbers could have changed, and the indexing
|
||||
-- position must be adjusted accordingly.
|
||||
self.timer_current_line = self.timer_current_line + delta
|
||||
end
|
||||
end
|
||||
|
||||
if first_line == self.last_edit_first_line and old_last_line == self.last_edit_last_line and new_last_line == self.last_edit_last_line then
|
||||
self.unique_words_curr_line_dirty = true
|
||||
@ -231,6 +267,9 @@ function buffer.watch(self)
|
||||
self.last_edit_last_line = new_last_line
|
||||
|
||||
self.words_distances_dirty = true
|
||||
|
||||
-- replace lines
|
||||
self:index_range(first_line, new_last_line)
|
||||
end,
|
||||
|
||||
on_reload = function(_, _)
|
||||
@ -238,23 +277,10 @@ function buffer.watch(self)
|
||||
return true
|
||||
end
|
||||
|
||||
-- The logic for adjusting lines list on buffer reloads is much simpler
|
||||
-- because tables of all lines can be assumed to be fresh.
|
||||
local new_lines_count = vim.api.nvim_buf_line_count(self.bufnr)
|
||||
if new_lines_count > self.lines_count then -- append
|
||||
for i = self.lines_count + 1, new_lines_count do
|
||||
self.lines_words[i] = {}
|
||||
end
|
||||
elseif new_lines_count < self.lines_count then -- remove
|
||||
for i = self.lines_count, new_lines_count + 1, -1 do
|
||||
self.lines_words[i] = nil
|
||||
end
|
||||
end
|
||||
self.lines_count = new_lines_count
|
||||
clear_table(self.lines_words)
|
||||
|
||||
self:index_range(0, self.lines_count)
|
||||
self:mark_all_lines_dirty()
|
||||
self.words_distances_dirty = true
|
||||
self:stop_indexing_timer()
|
||||
self:start_indexing_timer()
|
||||
end,
|
||||
|
||||
on_detach = function(_, _)
|
||||
@ -266,12 +292,6 @@ function buffer.watch(self)
|
||||
})
|
||||
end
|
||||
|
||||
local function clear_table(tbl)
|
||||
for k in pairs(tbl) do
|
||||
tbl[k] = nil
|
||||
end
|
||||
end
|
||||
|
||||
---@param linenr number
|
||||
---@param line string
|
||||
function buffer.index_line(self, linenr, line)
|
||||
|
@ -4,6 +4,8 @@ local buffer = require('cmp_buffer.buffer')
|
||||
---@field public keyword_length number
|
||||
---@field public keyword_pattern string
|
||||
---@field public get_bufnrs fun(): number[]
|
||||
---@field public indexing_batch_size number
|
||||
---@field public indexing_interval number
|
||||
|
||||
---@type cmp_buffer.Options
|
||||
local defaults = {
|
||||
@ -12,6 +14,8 @@ local defaults = {
|
||||
get_bufnrs = function()
|
||||
return { vim.api.nvim_get_current_buf() }
|
||||
end,
|
||||
indexing_batch_size = 1000,
|
||||
indexing_interval = 100,
|
||||
}
|
||||
|
||||
local source = {}
|
||||
@ -29,6 +33,8 @@ source._validate_options = function(_, params)
|
||||
keyword_length = { opts.keyword_length, 'number' },
|
||||
keyword_pattern = { opts.keyword_pattern, 'string' },
|
||||
get_bufnrs = { opts.get_bufnrs, 'function' },
|
||||
indexing_batch_size = { opts.indexing_batch_size, 'number' },
|
||||
indexing_interval = { opts.indexing_interval, 'number' },
|
||||
})
|
||||
return opts
|
||||
end
|
||||
@ -44,7 +50,7 @@ source.complete = function(self, params, callback)
|
||||
local processing = false
|
||||
local bufs = self:_get_buffers(opts)
|
||||
for _, buf in ipairs(bufs) do
|
||||
if buf.timer then
|
||||
if buf.timer:is_active() then
|
||||
processing = true
|
||||
break
|
||||
end
|
||||
@ -76,6 +82,7 @@ source.complete = function(self, params, callback)
|
||||
end
|
||||
|
||||
---@param opts cmp_buffer.Options
|
||||
---@return cmp_buffer.Buffer[]
|
||||
source._get_buffers = function(self, opts)
|
||||
local buffers = {}
|
||||
for _, bufnr in ipairs(opts.get_bufnrs()) do
|
||||
@ -84,7 +91,7 @@ source._get_buffers = function(self, opts)
|
||||
new_buf.on_close_cb = function()
|
||||
self.buffers[bufnr] = nil
|
||||
end
|
||||
new_buf:index()
|
||||
new_buf:start_indexing_timer()
|
||||
new_buf:watch()
|
||||
self.buffers[bufnr] = new_buf
|
||||
end
|
||||
|
68
lua/cmp_buffer/timer.lua
Normal file
68
lua/cmp_buffer/timer.lua
Normal file
@ -0,0 +1,68 @@
|
||||
---This timer matches the semantics of setInterval and clearInterval of
|
||||
---Javascript. It provides a more reliable alternative to vim.loop.timer_start
|
||||
---with a callback wrapped into a vim.schedule call by addressing two problems:
|
||||
---1. Scheduled callbacks are invoked less frequently than a libuv timer with a
|
||||
--- small interval (1-5ms). This causes those callbacks to fill up the queue
|
||||
--- in the event loop, and so the callback function may get invoked multiple
|
||||
--- times on one event loop tick. In contrast, Javascript's setInterval
|
||||
--- guarantees that the callback is not invoked more frequently than the
|
||||
--- interval.
|
||||
---2. When a libuv timer is stopped with vim.loop.timer_stop, it doesn't affect
|
||||
--- the callbacks that have already been scheduled. So timer_stop will not
|
||||
--- immediately stop the timer, the actual callback function will run one
|
||||
--- more time until it is finally stopped. This implementation ensures that
|
||||
--- timer_stop prevents any subsequent invocations of the callback.
|
||||
---
|
||||
---@class cmp_buffer.Timer
|
||||
---@field public handle any
|
||||
---@field private callback_wrapper_instance fun()|nil
|
||||
local timer = {}
|
||||
|
||||
function timer.new()
|
||||
local self = setmetatable({}, { __index = timer })
|
||||
self.handle = vim.loop.new_timer()
|
||||
self.callback_wrapper_instance = nil
|
||||
return self
|
||||
end
|
||||
|
||||
---@param timeout_ms number
|
||||
---@param repeat_ms number
|
||||
---@param callback fun()
|
||||
function timer:start(timeout_ms, repeat_ms, callback)
|
||||
-- This is the flag that fixes problem 1.
|
||||
local scheduled = false
|
||||
-- Creating a function on every call to timer_start ensures that we can always
|
||||
-- detect when a different callback is set by calling timer_start and prevent
|
||||
-- the old one from being invoked.
|
||||
local function callback_wrapper()
|
||||
if scheduled then
|
||||
return
|
||||
end
|
||||
scheduled = true
|
||||
vim.schedule(function()
|
||||
scheduled = false
|
||||
-- Either a different callback was set, or the timer has been stopped.
|
||||
if self.callback_wrapper_instance ~= callback_wrapper then
|
||||
return
|
||||
end
|
||||
callback()
|
||||
end)
|
||||
end
|
||||
self.handle:start(timeout_ms, repeat_ms, callback_wrapper)
|
||||
self.callback_wrapper_instance = callback_wrapper
|
||||
end
|
||||
|
||||
function timer:stop()
|
||||
self.handle:stop()
|
||||
self.callback_wrapper_instance = nil
|
||||
end
|
||||
|
||||
function timer:is_active()
|
||||
return self.handle:is_active()
|
||||
end
|
||||
|
||||
function timer:close()
|
||||
self.handle:close()
|
||||
end
|
||||
|
||||
return timer
|
Loading…
Reference in New Issue
Block a user