2021-08-08 08:38:47 +00:00
|
|
|
---@class cmp_buffer.Buffer
|
|
|
|
---@field public bufnr number
|
2021-11-02 22:03:21 +00:00
|
|
|
---@field public regex any
|
2021-11-02 03:19:39 +00:00
|
|
|
---@field public length number
|
2021-08-08 08:38:47 +00:00
|
|
|
---@field public pattern string
|
2021-11-07 11:45:43 +00:00
|
|
|
---@field public indexing_chunk_size number
|
|
|
|
---@field public indexing_interval number
|
2021-08-08 08:38:47 +00:00
|
|
|
---@field public timer any|nil
|
2021-11-02 22:20:07 +00:00
|
|
|
---@field public lines_count number
|
|
|
|
---@field public lines_words table<number, string[]>
|
2021-11-07 12:04:19 +00:00
|
|
|
---@field public unique_words_curr_line table<string, boolean>
|
|
|
|
---@field public unique_words_other_lines table<string, boolean>
|
|
|
|
---@field public unique_words_curr_line_dirty boolean
|
|
|
|
---@field public unique_words_other_lines_dirty boolean
|
|
|
|
---@field public last_edit_first_line number
|
|
|
|
---@field public last_edit_last_line number
|
2021-11-07 11:45:43 +00:00
|
|
|
---@field public closed boolean
|
|
|
|
---@field public on_close_cb fun()|nil
|
2021-08-08 08:38:47 +00:00
|
|
|
local buffer = {}
|
|
|
|
|
|
|
|
---Create new buffer object
|
|
|
|
---@param bufnr number
|
2021-11-02 03:19:39 +00:00
|
|
|
---@param length number
|
2021-08-08 08:38:47 +00:00
|
|
|
---@param pattern string
|
|
|
|
---@return cmp_buffer.Buffer
|
2021-11-02 03:19:39 +00:00
|
|
|
function buffer.new(bufnr, length, pattern)
|
2021-08-08 08:38:47 +00:00
|
|
|
local self = setmetatable({}, { __index = buffer })
|
2021-11-07 12:04:19 +00:00
|
|
|
|
2021-08-08 08:38:47 +00:00
|
|
|
self.bufnr = bufnr
|
2021-11-07 12:04:19 +00:00
|
|
|
self.timer = nil
|
|
|
|
self.closed = false
|
|
|
|
self.on_close_cb = nil
|
|
|
|
|
2021-11-02 22:03:21 +00:00
|
|
|
self.regex = vim.regex(pattern)
|
2021-11-02 03:19:39 +00:00
|
|
|
self.length = length
|
2021-08-08 08:38:47 +00:00
|
|
|
self.pattern = pattern
|
2021-11-07 11:45:43 +00:00
|
|
|
self.indexing_chunk_size = 1000
|
|
|
|
self.indexing_interval = 200
|
2021-11-07 12:04:19 +00:00
|
|
|
|
2021-11-02 22:20:07 +00:00
|
|
|
self.lines_count = 0
|
|
|
|
self.lines_words = {}
|
2021-11-07 12:04:19 +00:00
|
|
|
|
|
|
|
self.unique_words_curr_line = {}
|
|
|
|
self.unique_words_other_lines = {}
|
|
|
|
self.unique_words_curr_line_dirty = true
|
|
|
|
self.unique_words_other_lines_dirty = true
|
|
|
|
self.last_edit_first_line = 0
|
|
|
|
self.last_edit_last_line = 0
|
|
|
|
|
2021-08-08 08:38:47 +00:00
|
|
|
return self
|
|
|
|
end
|
|
|
|
|
|
|
|
---Close buffer
|
|
|
|
function buffer.close(self)
|
2021-11-07 11:45:43 +00:00
|
|
|
self.closed = true
|
|
|
|
self:stop_indexing_timer()
|
2021-11-07 12:04:19 +00:00
|
|
|
|
2021-11-07 11:45:43 +00:00
|
|
|
self.lines_count = 0
|
|
|
|
self.lines_words = {}
|
2021-11-07 12:04:19 +00:00
|
|
|
|
|
|
|
self.unique_words_curr_line = {}
|
|
|
|
self.unique_words_other_lines = {}
|
|
|
|
self.unique_words_curr_line_dirty = false
|
|
|
|
self.unique_words_other_lines_dirty = false
|
|
|
|
self.last_edit_first_line = 0
|
|
|
|
self.last_edit_last_line = 0
|
|
|
|
|
2021-11-07 11:45:43 +00:00
|
|
|
if self.on_close_cb then
|
|
|
|
self.on_close_cb()
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
function buffer.stop_indexing_timer(self)
|
|
|
|
if self.timer and not self.timer:is_closing() then
|
2021-08-08 08:38:47 +00:00
|
|
|
self.timer:stop()
|
|
|
|
self.timer:close()
|
|
|
|
end
|
2021-11-07 11:45:43 +00:00
|
|
|
self.timer = nil
|
2021-08-08 08:38:47 +00:00
|
|
|
end
|
|
|
|
|
2021-11-07 12:04:19 +00:00
|
|
|
function buffer.mark_all_lines_dirty(self)
|
|
|
|
self.unique_words_curr_line_dirty = true
|
|
|
|
self.unique_words_other_lines_dirty = true
|
|
|
|
self.last_edit_first_line = 0
|
|
|
|
self.last_edit_last_line = 0
|
|
|
|
end
|
|
|
|
|
2021-08-08 08:38:47 +00:00
|
|
|
---Indexing buffer
|
|
|
|
function buffer.index(self)
|
2021-11-02 22:20:07 +00:00
|
|
|
self.lines_count = vim.api.nvim_buf_line_count(self.bufnr)
|
2021-11-07 11:45:43 +00:00
|
|
|
for i = 1, self.lines_count do
|
|
|
|
self.lines_words[i] = {}
|
|
|
|
end
|
|
|
|
|
|
|
|
self:index_range_async(0, self.lines_count)
|
|
|
|
end
|
|
|
|
|
|
|
|
function buffer.index_range(self, range_start, range_end)
|
|
|
|
vim.api.nvim_buf_call(self.bufnr, function()
|
|
|
|
local lines = vim.api.nvim_buf_get_lines(self.bufnr, range_start, range_end, true)
|
|
|
|
for i, line in ipairs(lines) do
|
|
|
|
self:index_line(range_start + i, line)
|
|
|
|
end
|
|
|
|
end)
|
|
|
|
end
|
|
|
|
|
|
|
|
function buffer.index_range_async(self, range_start, range_end)
|
|
|
|
local chunk_start = range_start
|
|
|
|
|
|
|
|
local lines = vim.api.nvim_buf_get_lines(self.bufnr, range_start, range_end, true)
|
|
|
|
|
2021-08-08 08:38:47 +00:00
|
|
|
self.timer = vim.loop.new_timer()
|
|
|
|
self.timer:start(
|
|
|
|
0,
|
2021-11-07 11:45:43 +00:00
|
|
|
self.indexing_interval,
|
2021-08-08 08:38:47 +00:00
|
|
|
vim.schedule_wrap(function()
|
2021-11-07 11:45:43 +00:00
|
|
|
if self.closed then
|
|
|
|
return
|
|
|
|
end
|
|
|
|
|
|
|
|
local chunk_end = math.min(chunk_start + self.indexing_chunk_size, range_end)
|
2021-08-31 15:42:49 +00:00
|
|
|
vim.api.nvim_buf_call(self.bufnr, function()
|
2021-11-07 11:45:43 +00:00
|
|
|
for linenr = chunk_start + 1, chunk_end do
|
|
|
|
self:index_line(linenr, lines[linenr])
|
2021-08-31 15:42:49 +00:00
|
|
|
end
|
|
|
|
end)
|
2021-11-07 11:45:43 +00:00
|
|
|
chunk_start = chunk_end
|
2021-11-07 12:04:19 +00:00
|
|
|
self:mark_all_lines_dirty()
|
2021-08-08 08:38:47 +00:00
|
|
|
|
2021-11-07 11:45:43 +00:00
|
|
|
if chunk_end >= range_end then
|
|
|
|
self:stop_indexing_timer()
|
2021-08-08 08:38:47 +00:00
|
|
|
end
|
|
|
|
end)
|
|
|
|
)
|
|
|
|
end
|
|
|
|
|
|
|
|
--- watch
|
|
|
|
function buffer.watch(self)
|
2021-11-07 11:45:43 +00:00
|
|
|
-- NOTE: As far as I know, indexing in watching can't be done asynchronously
|
|
|
|
-- because even built-in commands generate multiple consequent `on_lines`
|
|
|
|
-- events, and I'm not even mentioning plugins here. To get accurate results
|
|
|
|
-- we would have to either re-index the entire file on throttled events (slow
|
|
|
|
-- and looses the benefit of on_lines watching), or put the events in a
|
|
|
|
-- queue, which would complicate the plugin a lot. Plus, most changes which
|
|
|
|
-- trigger this event will be from regular editing, and so 99% of the time
|
|
|
|
-- they will affect only 1-2 lines.
|
2021-08-08 08:38:47 +00:00
|
|
|
vim.api.nvim_buf_attach(self.bufnr, false, {
|
2021-11-02 22:20:07 +00:00
|
|
|
-- NOTE: line indexes are 0-based and the last line is not inclusive.
|
|
|
|
on_lines = function(_, _, _, first_line, old_last_line, new_last_line, _, _, _)
|
2021-11-07 11:45:43 +00:00
|
|
|
if self.closed then
|
2021-08-08 08:38:47 +00:00
|
|
|
return true
|
|
|
|
end
|
|
|
|
|
2021-11-02 22:20:07 +00:00
|
|
|
local delta = new_last_line - old_last_line
|
2021-11-07 11:45:43 +00:00
|
|
|
local old_lines_count = self.lines_count
|
|
|
|
local new_lines_count = old_lines_count + delta
|
|
|
|
if new_lines_count == 0 then -- clear
|
2021-11-02 22:20:07 +00:00
|
|
|
-- This branch protects against bugs after full-file deletion. If you
|
|
|
|
-- do, for example, gdGG, the new_last_line of the event will be zero.
|
|
|
|
-- Which is not true, a buffer always contains at least one empty line,
|
|
|
|
-- only unloaded buffers contain zero lines.
|
|
|
|
new_lines_count = 1
|
2021-11-07 11:45:43 +00:00
|
|
|
for i = old_lines_count, 2, -1 do
|
2021-11-02 22:20:07 +00:00
|
|
|
self.lines_words[i] = nil
|
|
|
|
end
|
|
|
|
self.lines_words[1] = {}
|
|
|
|
elseif delta > 0 then -- append
|
|
|
|
-- Explicitly reserve more slots in the array part of the lines table,
|
|
|
|
-- all of them will be filled in the next loop, but in reverse order
|
|
|
|
-- (which is why I am concerned about preallocation). Why is there no
|
|
|
|
-- built-in function to do this in Lua???
|
2021-11-07 11:45:43 +00:00
|
|
|
for i = old_lines_count + 1, new_lines_count do
|
2021-11-02 22:20:07 +00:00
|
|
|
self.lines_words[i] = vim.NIL
|
|
|
|
end
|
|
|
|
-- Move forwards the unchanged elements in the tail part.
|
2021-11-07 11:45:43 +00:00
|
|
|
for i = old_lines_count, old_last_line + 1, -1 do
|
2021-11-02 22:20:07 +00:00
|
|
|
self.lines_words[i + delta] = self.lines_words[i]
|
|
|
|
end
|
|
|
|
-- Fill in new tables for the added lines.
|
|
|
|
for i = old_last_line + 1, new_last_line do
|
|
|
|
self.lines_words[i] = {}
|
|
|
|
end
|
|
|
|
elseif delta < 0 then -- remove
|
|
|
|
-- Move backwards the unchanged elements in the tail part.
|
2021-11-07 11:45:43 +00:00
|
|
|
for i = old_last_line + 1, old_lines_count do
|
2021-11-02 22:20:07 +00:00
|
|
|
self.lines_words[i + delta] = self.lines_words[i]
|
|
|
|
end
|
|
|
|
-- Remove (already copied) tables from the end, in reverse order, so
|
|
|
|
-- that we don't make holes in the lines table.
|
2021-11-07 11:45:43 +00:00
|
|
|
for i = old_lines_count, new_lines_count + 1, -1 do
|
2021-11-02 22:20:07 +00:00
|
|
|
self.lines_words[i] = nil
|
|
|
|
end
|
2021-08-08 08:38:47 +00:00
|
|
|
end
|
2021-11-02 22:20:07 +00:00
|
|
|
self.lines_count = new_lines_count
|
2021-08-08 08:38:47 +00:00
|
|
|
|
|
|
|
-- replace lines
|
2021-11-07 11:45:43 +00:00
|
|
|
self:index_range(first_line, new_last_line)
|
2021-11-07 12:04:19 +00:00
|
|
|
|
|
|
|
if first_line == self.last_edit_first_line and old_last_line == self.last_edit_last_line and new_last_line == self.last_edit_last_line then
|
|
|
|
self.unique_words_curr_line_dirty = true
|
|
|
|
else
|
|
|
|
self.unique_words_curr_line_dirty = true
|
|
|
|
self.unique_words_other_lines_dirty = true
|
|
|
|
end
|
|
|
|
self.last_edit_first_line = first_line
|
|
|
|
self.last_edit_last_line = new_last_line
|
2021-11-07 11:45:43 +00:00
|
|
|
end,
|
|
|
|
|
|
|
|
on_reload = function(_, _)
|
|
|
|
if self.closed then
|
|
|
|
return true
|
|
|
|
end
|
|
|
|
|
|
|
|
-- The logic for adjusting lines list on buffer reloads is much simpler
|
|
|
|
-- because tables of all lines can be assumed to be fresh.
|
|
|
|
local new_lines_count = vim.api.nvim_buf_line_count(self.bufnr)
|
|
|
|
if new_lines_count > self.lines_count then -- append
|
|
|
|
for i = self.lines_count + 1, new_lines_count do
|
|
|
|
self.lines_words[i] = {}
|
2021-08-08 08:38:47 +00:00
|
|
|
end
|
2021-11-07 11:45:43 +00:00
|
|
|
elseif new_lines_count < self.lines_count then -- remove
|
|
|
|
for i = self.lines_count, new_lines_count + 1, -1 do
|
|
|
|
self.lines_words[i] = nil
|
|
|
|
end
|
|
|
|
end
|
|
|
|
self.lines_count = new_lines_count
|
|
|
|
|
|
|
|
self:index_range(0, self.lines_count)
|
2021-11-07 12:04:19 +00:00
|
|
|
self:mark_all_lines_dirty()
|
2021-11-07 11:45:43 +00:00
|
|
|
end,
|
|
|
|
|
|
|
|
on_detach = function(_, _)
|
|
|
|
if self.closed then
|
|
|
|
return true
|
|
|
|
end
|
|
|
|
self:close()
|
2021-11-02 22:20:07 +00:00
|
|
|
end,
|
2021-08-08 08:38:47 +00:00
|
|
|
})
|
|
|
|
end
|
|
|
|
|
2021-11-02 22:03:21 +00:00
|
|
|
---@param linenr number
|
|
|
|
---@param line string
|
|
|
|
function buffer.index_line(self, linenr, line)
|
2021-11-07 11:45:43 +00:00
|
|
|
local words = self.lines_words[linenr]
|
|
|
|
for k, _ in ipairs(words) do
|
|
|
|
words[k] = nil
|
|
|
|
end
|
2021-11-02 22:03:21 +00:00
|
|
|
local word_i = 1
|
2021-08-08 08:38:47 +00:00
|
|
|
|
2021-11-02 22:03:21 +00:00
|
|
|
local remaining = line
|
|
|
|
while #remaining > 0 do
|
|
|
|
-- NOTE: Both start and end indexes here are 0-based (unlike Lua strings),
|
|
|
|
-- and the end index is not inclusive.
|
|
|
|
local match_start, match_end = self.regex:match_str(remaining)
|
|
|
|
if match_start and match_end then
|
|
|
|
local word = remaining:sub(match_start + 1, match_end)
|
2021-11-02 03:19:39 +00:00
|
|
|
if #word >= self.length then
|
2021-11-02 22:03:21 +00:00
|
|
|
words[word_i] = word
|
|
|
|
word_i = word_i + 1
|
2021-08-08 08:38:47 +00:00
|
|
|
end
|
2021-11-02 22:03:21 +00:00
|
|
|
remaining = remaining:sub(match_end + 1)
|
|
|
|
else
|
2021-08-08 08:38:47 +00:00
|
|
|
break
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
function buffer.get_words(self)
|
2021-11-07 11:59:10 +00:00
|
|
|
-- NOTE: unique_words are rebuilt on-demand because it is common for the
|
|
|
|
-- watcher callback to be fired VERY frequently, and a rebuild needs to go
|
|
|
|
-- over ALL lines, not just the changed ones.
|
2021-11-07 12:04:19 +00:00
|
|
|
if self.unique_words_other_lines_dirty then
|
|
|
|
local words = self.unique_words_other_lines
|
|
|
|
for w, _ in pairs(words) do
|
|
|
|
words[w] = nil
|
|
|
|
end
|
|
|
|
self:rebuild_unique_words(words, 0, self.last_edit_first_line)
|
|
|
|
self:rebuild_unique_words(words, self.last_edit_last_line, self.lines_count)
|
|
|
|
self.unique_words_other_lines_dirty = false
|
2021-11-07 11:59:10 +00:00
|
|
|
end
|
2021-11-07 12:04:19 +00:00
|
|
|
if self.unique_words_curr_line_dirty then
|
|
|
|
local words = self.unique_words_curr_line
|
|
|
|
for w, _ in pairs(words) do
|
|
|
|
words[w] = nil
|
|
|
|
end
|
|
|
|
self:rebuild_unique_words(words, self.last_edit_first_line, self.last_edit_last_line)
|
|
|
|
self.unique_words_curr_line_dirty = false
|
|
|
|
end
|
|
|
|
return { self.unique_words_other_lines, self.unique_words_curr_line }
|
2021-11-07 11:59:10 +00:00
|
|
|
end
|
|
|
|
|
2021-11-07 12:04:19 +00:00
|
|
|
--- rebuild_unique_words
|
|
|
|
function buffer.rebuild_unique_words(self, words_table, range_start, range_end)
|
|
|
|
for i = range_start + 1, range_end do
|
|
|
|
for _, w in ipairs(self.lines_words[i]) do
|
|
|
|
words_table[w] = true
|
2021-08-08 08:38:47 +00:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
return buffer
|