diff --git a/README.md b/README.md index 52d87e9..26025fc 100644 --- a/README.md +++ b/README.md @@ -109,6 +109,58 @@ end ``` +### indexing_interval (type: number) + +_Default:_ `200` + +The rate (in milliseconds) at which buffers are scanned for words when they are first opened. +Setting this interval to lower values will increase the speed of indexing, but at the expense of +higher CPU usage. By default indexing happens asynchronously, but setting this option to zero or +a negative value will switch indexing to a synchronous algorithm, which uses significantly less +RAM on big files and takes less time in total (to index the entire file), with the obvious +downside of blocking the user interface for a second or two. On small files (up to tens of +thousands of lines, probably) the difference will be unnoticeable, though. + + +### indexing_chunk_size (type: number) + +_Default:_ `1000` + +The number of lines processed in batch every `indexing_interval` milliseconds. Setting it to +higher values will make indexing faster, but at the cost of responsiveness of the UI. When using +the synchronous mode, changing this option may improve memory usage, though the default value has +been tested to be pretty good in this regard. + +Please note that the `indexing_interval` and `indexing_chunk_size` are advanced options, change +them only if you experience performance or RAM usage problems (or need to work on particularly +large files) and be sure to measure the results! + + +## Performance on large text files + +This source has been tested on code files of a few megabytes in size (5-10) and it has been +optimized for them, however, the indexed words can still take up tens of megabytes of RAM if the +file is big (on small files it _will not be more_ than a couple of megabytes, typically much +less). So if you wish to avoid accidentally wasting lots of RAM when editing big files, you can +tweak `get_bufnrs`, for example like this: + +```lua +get_bufnrs = function() + local buf = vim.api.nvim_get_current_buf() + local byte_size = vim.api.nvim_buf_get_offset(buf, vim.api.nvim_buf_line_count(buf)) + if byte_size > 1024 * 1024 then -- 1 Megabyte max + return {} + end + return { buf } +end +``` + +Of course, this snippet can be combined with any other recipes for `get_bufnrs`. + +As another tip, turning on the synchronous indexing mode is very likely to help with reducing +memory usage, see the `indexing_interval` option. + + ## Locality bonus comparator (distance-based sorting) This source also provides a comparator function which uses information from the word indexer diff --git a/lua/cmp_buffer/buffer.lua b/lua/cmp_buffer/buffer.lua index 4a44e32..777a89b 100644 --- a/lua/cmp_buffer/buffer.lua +++ b/lua/cmp_buffer/buffer.lua @@ -2,8 +2,6 @@ ---@field public bufnr number ---@field public opts cmp_buffer.Options ---@field public regex any ----@field public indexing_chunk_size number ----@field public indexing_interval number ---@field public timer any|nil ---@field public lines_count number ---@field public lines_words table<number, string[]> @@ -34,8 +32,6 @@ function buffer.new(bufnr, opts) self.opts = opts self.regex = vim.regex(self.opts.keyword_pattern) - self.indexing_chunk_size = 1000 - self.indexing_interval = 200 self.lines_count = 0 self.lines_words = {} @@ -96,11 +92,18 @@ end ---Indexing buffer function buffer.index(self) self.lines_count = vim.api.nvim_buf_line_count(self.bufnr) - for i = 1, self.lines_count do - self.lines_words[i] = {} - end + -- NOTE: Pre-allocating self.lines_words here somehow wastes more memory, and + -- not doing that doesn't have a visible effect on performance. Win-win. + -- for i = 1, self.lines_count do + -- self.lines_words[i] = {} + -- end - self:index_range_async(0, self.lines_count) + if self.opts.indexing_interval <= 0 then + self:index_range(0, self.lines_count, self.opts.indexing_chunk_size) + self:mark_all_lines_dirty() + else + self:index_range_async(0, self.lines_count, self.opts.indexing_chunk_size) + end end --- Workaround for https://github.com/neovim/neovim/issues/16729 @@ -112,30 +115,52 @@ function buffer.safe_buf_call(self, callback) end end -function buffer.index_range(self, range_start, range_end) +--- sync algorithm +function buffer.index_range(self, range_start, range_end, chunk_size) self:safe_buf_call(function() - local lines = vim.api.nvim_buf_get_lines(self.bufnr, range_start, range_end, true) - for i, line in ipairs(lines) do - self:index_line(range_start + i, line) + if chunk_size < 1 then + chunk_size = range_end - range_start + end + local chunk_start = range_start + while chunk_start < range_end do + local chunk_end = math.min(chunk_start + chunk_size, range_end) + -- For some reason requesting line arrays multiple times in chunks leads + -- to much better memory usage than doing that in one big array, which is + -- why the sync algorithm has better memory usage than the async one. + local chunk_lines = vim.api.nvim_buf_get_lines(self.bufnr, chunk_start, chunk_end, true) + for i, line in ipairs(chunk_lines) do + self:index_line(chunk_start + i, line) + end + chunk_start = chunk_end end end) end -function buffer.index_range_async(self, range_start, range_end) +--- async algorithm +function buffer.index_range_async(self, range_start, range_end, chunk_size) + if chunk_size < 1 then + chunk_size = range_end - range_start + end local chunk_start = range_start local lines = vim.api.nvim_buf_get_lines(self.bufnr, range_start, range_end, true) + -- This flag prevents vim.schedule() callbacks from piling up in the queue + -- when the indexing interval is very short. + local scheduled = false self.timer = vim.loop.new_timer() - self.timer:start( - 0, - self.indexing_interval, - vim.schedule_wrap(function() + self.timer:start(0, self.opts.indexing_interval, function() + if scheduled then + return + end + scheduled = true + vim.schedule(function() + scheduled = false if self.closed then return end - local chunk_end = math.min(chunk_start + self.indexing_chunk_size, range_end) + local chunk_end = math.min(chunk_start + chunk_size, range_end) self:safe_buf_call(function() for linenr = chunk_start + 1, chunk_end do self:index_line(linenr, lines[linenr]) @@ -149,7 +174,7 @@ function buffer.index_range_async(self, range_start, range_end) self:stop_indexing_timer() end end) - ) + end) end --- watch @@ -219,7 +244,7 @@ function buffer.watch(self) self.lines_count = new_lines_count -- replace lines - self:index_range(first_line, new_last_line) + self:index_range(first_line, new_last_line, self.opts.indexing_chunk_size) if first_line == self.last_edit_first_line and old_last_line == self.last_edit_last_line and new_last_line == self.last_edit_last_line then self.unique_words_curr_line_dirty = true @@ -242,9 +267,11 @@ function buffer.watch(self) -- because tables of all lines can be assumed to be fresh. local new_lines_count = vim.api.nvim_buf_line_count(self.bufnr) if new_lines_count > self.lines_count then -- append - for i = self.lines_count + 1, new_lines_count do - self.lines_words[i] = {} - end + -- Again, no need to pre-allocate, index_line will append new lines + -- itself. + -- for i = self.lines_count + 1, new_lines_count do + -- self.lines_words[i] = {} + -- end elseif new_lines_count < self.lines_count then -- remove for i = self.lines_count, new_lines_count + 1, -1 do self.lines_words[i] = nil @@ -252,7 +279,7 @@ function buffer.watch(self) end self.lines_count = new_lines_count - self:index_range(0, self.lines_count) + self:index_range(0, self.lines_count, self.opts.indexing_chunk_size) self:mark_all_lines_dirty() self.words_distances_dirty = true end, diff --git a/lua/cmp_buffer/source.lua b/lua/cmp_buffer/source.lua index f9cdc30..2bb7806 100644 --- a/lua/cmp_buffer/source.lua +++ b/lua/cmp_buffer/source.lua @@ -4,6 +4,8 @@ local buffer = require('cmp_buffer.buffer') ---@field public keyword_length number ---@field public keyword_pattern string ---@field public get_bufnrs fun(): number[] +---@field public indexing_chunk_size number +---@field public indexing_interval number ---@type cmp_buffer.Options local defaults = { @@ -12,6 +14,8 @@ local defaults = { get_bufnrs = function() return { vim.api.nvim_get_current_buf() } end, + indexing_chunk_size = 1000, + indexing_interval = 200, } local source = {} @@ -29,6 +33,8 @@ source._validate_options = function(_, params) keyword_length = { opts.keyword_length, 'number' }, keyword_pattern = { opts.keyword_pattern, 'string' }, get_bufnrs = { opts.get_bufnrs, 'function' }, + indexing_chunk_size = { opts.indexing_chunk_size, 'number' }, + indexing_interval = { opts.indexing_interval, 'number' }, }) return opts end