mirror of
https://github.com/hrsh7th/cmp-buffer
synced 2025-04-19 05:45:14 +00:00
optimize buffer indexing, add sync mode, add options for tweaking it
This commit is contained in:
parent
d1ca295ce5
commit
846c7230ff
@ -1,10 +1,14 @@
|
||||
---@class cmp_buffer.Buffer
|
||||
---@field public bufnr number
|
||||
---@field public regexes any[]
|
||||
---@field public regex any
|
||||
---@field public length number
|
||||
---@field public pattern string
|
||||
---@field public indexing_chunk_size number
|
||||
---@field public indexing_interval number
|
||||
---@field public timer any|nil
|
||||
---@field public words table<number, string[]>
|
||||
---@field public lines_words table<number, string[]>
|
||||
---@field public unique_words table<string, boolean>
|
||||
---@field public unique_words_dirty boolean
|
||||
---@field public processing boolean
|
||||
local buffer = {}
|
||||
|
||||
@ -12,15 +16,22 @@ local buffer = {}
|
||||
---@param bufnr number
|
||||
---@param length number
|
||||
---@param pattern string
|
||||
---@param indexing_chunk_size number
|
||||
---@param indexing_interval number
|
||||
---@return cmp_buffer.Buffer
|
||||
function buffer.new(bufnr, length, pattern)
|
||||
function buffer.new(bufnr, length, pattern, indexing_chunk_size, indexing_interval)
|
||||
local self = setmetatable({}, { __index = buffer })
|
||||
self.bufnr = bufnr
|
||||
self.regexes = {}
|
||||
self.regex = vim.regex(pattern)
|
||||
self.length = length
|
||||
self.pattern = pattern
|
||||
self.indexing_chunk_size = indexing_chunk_size
|
||||
self.indexing_interval = indexing_interval
|
||||
self.timer = nil
|
||||
self.words = {}
|
||||
self.lines_count = 0
|
||||
self.lines_words = {}
|
||||
self.unique_words = {}
|
||||
self.unique_words_dirty = true
|
||||
self.processing = false
|
||||
return self
|
||||
end
|
||||
@ -32,28 +43,75 @@ function buffer.close(self)
|
||||
self.timer:close()
|
||||
self.timer = nil
|
||||
end
|
||||
self.words = {}
|
||||
self.lines_count = 0
|
||||
self.lines_words = {}
|
||||
self.unique_words = {}
|
||||
self.unique_words_dirty = false
|
||||
end
|
||||
|
||||
---Indexing buffer
|
||||
function buffer.index(self)
|
||||
self.processing = true
|
||||
local index = 1
|
||||
local lines = vim.api.nvim_buf_get_lines(self.bufnr, 0, -1, false)
|
||||
|
||||
self.lines_count = vim.api.nvim_buf_line_count(self.bufnr)
|
||||
local chunk_max_size = self.indexing_chunk_size
|
||||
if chunk_max_size < 1 then
|
||||
-- Index all lines in one go.
|
||||
chunk_max_size = self.lines_count
|
||||
end
|
||||
local chunk_start = 0
|
||||
|
||||
if self.indexing_interval <= 0 then
|
||||
-- sync algorithm
|
||||
|
||||
vim.api.nvim_buf_call(self.bufnr, function()
|
||||
while chunk_start < self.lines_count do
|
||||
local chunk_end = math.min(chunk_start + chunk_max_size, self.lines_count)
|
||||
-- For some reason requesting line arrays multiple times in chunks
|
||||
-- leads to much better memory usage than doing that in one big array,
|
||||
-- which is why the sync algorithm has better memory usage than the
|
||||
-- async one.
|
||||
local chunk_lines = vim.api.nvim_buf_get_lines(self.bufnr, chunk_start, chunk_end, true)
|
||||
for linenr = chunk_start + 1, chunk_end do
|
||||
self.lines_words[linenr] = {}
|
||||
self:index_line(linenr, chunk_lines[linenr - chunk_start])
|
||||
end
|
||||
chunk_start = chunk_end
|
||||
end
|
||||
end)
|
||||
|
||||
self:rebuild_unique_words()
|
||||
|
||||
self.processing = false
|
||||
return
|
||||
end
|
||||
|
||||
-- async algorithm
|
||||
|
||||
local lines = vim.api.nvim_buf_get_lines(self.bufnr, 0, -1, true)
|
||||
-- This flag prevents vim.schedule() callbacks from piling up in the queue
|
||||
-- when the indexing interval is very short.
|
||||
local scheduled = false
|
||||
|
||||
self.timer = vim.loop.new_timer()
|
||||
self.timer:start(
|
||||
0,
|
||||
200,
|
||||
vim.schedule_wrap(function()
|
||||
local chunk = math.min(index + 1000, #lines)
|
||||
self.timer:start(0, self.indexing_interval, function()
|
||||
if scheduled then
|
||||
return
|
||||
end
|
||||
scheduled = true
|
||||
vim.schedule(function()
|
||||
scheduled = false
|
||||
|
||||
local chunk_end = math.min(chunk_start + chunk_max_size, self.lines_count)
|
||||
vim.api.nvim_buf_call(self.bufnr, function()
|
||||
for i = index, chunk do
|
||||
self:index_line(i, lines[i] or '')
|
||||
for linenr = chunk_start + 1, chunk_end do
|
||||
self.lines_words[linenr] = {}
|
||||
self:index_line(linenr, lines[linenr])
|
||||
end
|
||||
end)
|
||||
index = chunk + 1
|
||||
chunk_start = chunk_end
|
||||
|
||||
if chunk >= #lines then
|
||||
if chunk_end >= self.lines_count then
|
||||
if self.timer then
|
||||
self.timer:stop()
|
||||
self.timer:close()
|
||||
@ -62,88 +120,127 @@ function buffer.index(self)
|
||||
self.processing = false
|
||||
end
|
||||
end)
|
||||
)
|
||||
end)
|
||||
end
|
||||
|
||||
-- See below.
|
||||
local shared_marker_table_for_preallocation = {}
|
||||
|
||||
--- watch
|
||||
function buffer.watch(self)
|
||||
-- NOTE: As far as I know, indexing in watching can't be done asynchronously
|
||||
-- because even built-in commands generate multiple consequent `on_lines`
|
||||
-- events, and I'm not even mentioning plugins here. To get accurate results
|
||||
-- we would have to either re-index the entire file on throttled events (slow
|
||||
-- and looses the benefit of on_lines watching), or put the events in a
|
||||
-- queue, which would complicate the plugin a lot. Plus, most changes which
|
||||
-- trigger this event will be from regular editing, and so 99% of the time
|
||||
-- they will affect only 1-2 lines.
|
||||
vim.api.nvim_buf_attach(self.bufnr, false, {
|
||||
on_lines = vim.schedule_wrap(function(_, _, _, firstline, old_lastline, new_lastline, _, _, _)
|
||||
if not vim.api.nvim_buf_is_valid(self.bufnr) then
|
||||
self:close()
|
||||
-- NOTE: line indexes are 0-based and the last line is not inclusive.
|
||||
on_lines = function(_, _, _, first_line, old_last_line, new_last_line, _, _, _)
|
||||
if not vim.api.nvim_buf_is_loaded(self.bufnr) then
|
||||
return true
|
||||
end
|
||||
|
||||
-- append
|
||||
for i = old_lastline, new_lastline - 1 do
|
||||
table.insert(self.words, i + 1, {})
|
||||
end
|
||||
|
||||
-- remove
|
||||
for _ = new_lastline, old_lastline - 1 do
|
||||
table.remove(self.words, new_lastline + 1)
|
||||
local delta = new_last_line - old_last_line
|
||||
local new_lines_count = self.lines_count + delta
|
||||
if delta > 0 then -- append
|
||||
-- Explicitly reserve more slots in the array part of the lines table,
|
||||
-- all of them will be filled in the next loop, but in reverse order
|
||||
-- (which is why I am concerned about preallocation). Why is there no
|
||||
-- built-in function to do this in Lua???
|
||||
for i = self.lines_count + 1, new_lines_count do
|
||||
self.lines_words[i] = shared_marker_table_for_preallocation
|
||||
end
|
||||
-- Move forwards the unchanged elements in the tail part.
|
||||
for i = self.lines_count, old_last_line + 1, -1 do
|
||||
self.lines_words[i + delta] = self.lines_words[i]
|
||||
end
|
||||
-- Fill in new tables for the added lines.
|
||||
for i = old_last_line + 1, new_last_line do
|
||||
self.lines_words[i] = {}
|
||||
end
|
||||
elseif delta < 0 then -- remove
|
||||
-- Move backwards the unchanged elements in the tail part.
|
||||
for i = old_last_line + 1, self.lines_count do
|
||||
self.lines_words[i + delta] = self.lines_words[i]
|
||||
end
|
||||
-- Remove (already copied) tables from the end, in reverse order, so
|
||||
-- that we don't make holes in the lines table.
|
||||
for i = self.lines_count, new_lines_count + 1, -1 do
|
||||
self.lines_words[i] = nil
|
||||
end
|
||||
end
|
||||
self.lines_count = new_lines_count
|
||||
|
||||
-- replace lines
|
||||
local lines = vim.api.nvim_buf_get_lines(self.bufnr, firstline, new_lastline, false)
|
||||
local lines = vim.api.nvim_buf_get_lines(self.bufnr, first_line, new_last_line, true)
|
||||
vim.api.nvim_buf_call(self.bufnr, function()
|
||||
for i, line in ipairs(lines) do
|
||||
if line then
|
||||
self:index_line(firstline + i, line or '')
|
||||
end
|
||||
self:index_line(first_line + i, line)
|
||||
end
|
||||
end)
|
||||
end),
|
||||
|
||||
self.unique_words_dirty = true
|
||||
end,
|
||||
|
||||
on_detach = function(_)
|
||||
self:close()
|
||||
end,
|
||||
})
|
||||
end
|
||||
|
||||
--- add_words
|
||||
function buffer.index_line(self, i, line)
|
||||
local words = {}
|
||||
---@param linenr number
|
||||
---@param line string
|
||||
function buffer.index_line(self, linenr, line)
|
||||
local words = self.lines_words[linenr]
|
||||
for k, _ in ipairs(words) do
|
||||
words[k] = nil
|
||||
end
|
||||
local word_i = 1
|
||||
|
||||
local buf = line
|
||||
while true do
|
||||
local s, e = self:matchstrpos(buf)
|
||||
if s then
|
||||
local word = string.sub(buf, s, e - 1)
|
||||
local remaining = line
|
||||
while #remaining > 0 do
|
||||
-- NOTE: Both start and end indexes here are 0-based (unlike Lua strings),
|
||||
-- and the end index is not inclusive.
|
||||
local match_start, match_end = self.regex:match_str(remaining)
|
||||
if match_start and match_end then
|
||||
local word = remaining:sub(match_start + 1, match_end)
|
||||
if #word >= self.length then
|
||||
table.insert(words, word)
|
||||
words[word_i] = word
|
||||
word_i = word_i + 1
|
||||
end
|
||||
end
|
||||
local new_buffer = string.sub(buf, e and e + 1 or 2)
|
||||
if buf == new_buffer then
|
||||
remaining = remaining:sub(match_end + 1)
|
||||
else
|
||||
break
|
||||
end
|
||||
buf = new_buffer
|
||||
end
|
||||
|
||||
self.words[i] = words
|
||||
end
|
||||
|
||||
--- get_words
|
||||
function buffer.get_words(self)
|
||||
local words = {}
|
||||
for _, line in ipairs(self.words) do
|
||||
-- NOTE: unique_words are rebuilt on-demand because it is common for the
|
||||
-- watcher callback to be fired VERY frequently, and a rebuild needs to go
|
||||
-- over ALL lines, not just the changed ones.
|
||||
if self.unique_words_dirty then
|
||||
self:rebuild_unique_words()
|
||||
end
|
||||
return self.unique_words
|
||||
end
|
||||
|
||||
--- rebuild_unique_words
|
||||
function buffer.rebuild_unique_words(self)
|
||||
for w, _ in pairs(self.unique_words) do
|
||||
self.unique_words[w] = nil
|
||||
end
|
||||
for _, line in ipairs(self.lines_words) do
|
||||
for _, w in ipairs(line) do
|
||||
table.insert(words, w)
|
||||
self.unique_words[w] = true
|
||||
end
|
||||
end
|
||||
return words
|
||||
end
|
||||
|
||||
--- matchstrpos
|
||||
function buffer.matchstrpos(self, text)
|
||||
local s, e = self:regex(self.pattern):match_str(text)
|
||||
if s == nil then
|
||||
return nil, nil
|
||||
end
|
||||
return s + 1, e + 1
|
||||
end
|
||||
|
||||
--- regex
|
||||
function buffer.regex(self, pattern)
|
||||
self.regexes[pattern] = self.regexes[pattern] or vim.regex(pattern)
|
||||
return self.regexes[pattern]
|
||||
self.unique_words_dirty = false
|
||||
end
|
||||
|
||||
return buffer
|
||||
|
@ -6,6 +6,8 @@ local defaults = {
|
||||
get_bufnrs = function()
|
||||
return { vim.api.nvim_get_current_buf() }
|
||||
end,
|
||||
indexing_chunk_size = 1000,
|
||||
indexing_interval = 200,
|
||||
}
|
||||
|
||||
local source = {}
|
||||
@ -16,34 +18,37 @@ source.new = function()
|
||||
return self
|
||||
end
|
||||
|
||||
source.get_keyword_pattern = function(_, params)
|
||||
source._validate_options = function(_, params)
|
||||
params.option = vim.tbl_deep_extend('keep', params.option, defaults)
|
||||
vim.validate({
|
||||
keyword_length = { params.option.keyword_length, 'number', '`opts.keyword_length` must be `number`' },
|
||||
keyword_pattern = { params.option.keyword_pattern, 'string', '`opts.keyword_pattern` must be `string`' },
|
||||
get_bufnrs = { params.option.get_bufnrs, 'function', '`opts.get_bufnrs` must be `function`' },
|
||||
keyword_length = { params.option.keyword_length, 'number' },
|
||||
keyword_pattern = { params.option.keyword_pattern, 'string' },
|
||||
get_bufnrs = { params.option.get_bufnrs, 'function' },
|
||||
indexing_chunk_size = { params.option.indexing_chunk_size, 'number' },
|
||||
indexing_interval = { params.option.indexing_interval, 'number' },
|
||||
})
|
||||
end
|
||||
|
||||
source.get_keyword_pattern = function(self, params)
|
||||
self:_validate_options(params)
|
||||
return params.option.keyword_pattern
|
||||
end
|
||||
|
||||
source.complete = function(self, params, callback)
|
||||
params.option = vim.tbl_deep_extend('keep', params.option, defaults)
|
||||
vim.validate({
|
||||
keyword_pattern = { params.option.keyword_pattern, 'string', '`opts.keyword_pattern` must be `string`' },
|
||||
get_bufnrs = { params.option.get_bufnrs, 'function', '`opts.get_bufnrs` must be `function`' },
|
||||
})
|
||||
self:_validate_options(params)
|
||||
|
||||
local processing = false
|
||||
for _, buf in ipairs(self:_get_buffers(params)) do
|
||||
local bufs = self:_get_buffers(params)
|
||||
for _, buf in ipairs(bufs) do
|
||||
processing = processing or buf.processing
|
||||
end
|
||||
|
||||
vim.defer_fn(vim.schedule_wrap(function()
|
||||
vim.defer_fn(function()
|
||||
local input = string.sub(params.context.cursor_before_line, params.offset)
|
||||
local items = {}
|
||||
local words = {}
|
||||
for _, buf in ipairs(self:_get_buffers(params)) do
|
||||
for _, word in ipairs(buf:get_words()) do
|
||||
for _, buf in ipairs(bufs) do
|
||||
for word, _ in pairs(buf:get_words()) do
|
||||
if not words[word] and input ~= word then
|
||||
words[word] = true
|
||||
table.insert(items, {
|
||||
@ -58,7 +63,7 @@ source.complete = function(self, params, callback)
|
||||
items = items,
|
||||
isIncomplete = processing,
|
||||
})
|
||||
end), processing and 100 or 0)
|
||||
end, processing and 100 or 0)
|
||||
end
|
||||
|
||||
--- _get_bufs
|
||||
@ -69,7 +74,9 @@ source._get_buffers = function(self, params)
|
||||
local new_buf = buffer.new(
|
||||
bufnr,
|
||||
params.option.keyword_length,
|
||||
params.option.keyword_pattern
|
||||
params.option.keyword_pattern,
|
||||
params.option.indexing_chunk_size,
|
||||
params.option.indexing_interval
|
||||
)
|
||||
new_buf:index()
|
||||
new_buf:watch()
|
||||
|
Loading…
Reference in New Issue
Block a user