You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

330 lines
11 KiB

;;; mono-complete-backend-word-predict.el --- DABBREV back-end -*- lexical-binding: t -*-
;; SPDX-License-Identifier: GPL-3.0-or-later
;; Copyright (C) 2023 Campbell Barton
;; URL:
;; Version: 0.1
;;; Commentary:
;; Word predict back-end.
;;; Code:
(require 'mono-complete)
;; ---------------------------------------------------------------------------
;; Custom Variables
(defgroup mono-complete-backend-word-predict nil
"Word prediction for mono-complete, generated on demand."
:group 'convenience)
(defcustom mono-complete-backend-word-predict-input-paths-match-source
"*.c" ; C.
"*.cc" ; C++.
"*.cpp" ; C++.
"*.cxx" ; C++.
"*.el" ; EMacs-lisp.
"*.glsl" ; OpenGL shading language.
"*.go" ; GO language.
"*.h" ; C header.
"*.hh" ; C++ header.
"*.hxx" ; C++ header.
"*.java" ; Java.
"*.js" ; Java-Script.
"*.lua" ; Lua.
"*.m" ; Objective-C.
"*.mm" ; Objective-C++.
"*.py" ; Python.
"*.rb" ; Ruby.
"*.rs" ; Rust.
"Source files to include when scanning directories for files to extract.
Files are parsed as code and comments are extracted.
Each entry is a UNIX style glob."
:type (list 'repeat 'string))
(defcustom mono-complete-backend-word-predict-input-paths-match-text (list "*.rst" "*.md" "*.txt")
"Text files to include when scanning directories for files to extract.
Files are parsed as plain-text, all words are extracted.
Each entry is a UNIX style glob."
:type (list 'repeat 'string))
(defcustom mono-complete-backend-word-predict-input-paths-size-limit 524288
"Files above this size will be ignored when scanning directories recursively.
This can avoid slow parsing for source files which are used as data-storage.
Zero disabled size limit checks."
:type 'integer)
(defcustom mono-complete-backend-word-predict-input-paths (list "")
"Paths used for extracting text.
- A blank string (default) will be replaced by the current buffers project root.
- A file will be read.
- A directory will be scanned recursively for files matching
`mono-complete-backend-word-predict-input-paths-match-source' or
:type (list 'repeat 'string))
(defcustom mono-complete-backend-word-predict-update-method 'when-missing
"Method used for validating the model.
- \"From Manifest\" rebuilds cache that has become outdated.
- \"When Missing\" only generates data when the cache is not found.
This can be used when users prefer to manually update cache."
'(choice (const :tag "From Manifest" from-manifest)
(const :tag "When Missing" when-missing)))
(defcustom mono-complete-backend-word-predict-ngram-max 5
"The n-gram size.
2 or more, avoid values over 4 (or accept very large data-bases)."
:type 'integer)
(defconst mono-complete-backend-word-predict--command
(file-name-directory (or load-file-name buffer-file-name))
;; (defconst
;; mono-complete-backend-word-predict--command
;; "/src/emacs/mono-complete/")
;; ---------------------------------------------------------------------------
;; Internal Utilities
(defun mono-complete-backend-word-predict--range-contains (beg end re-contains)
"Return non-nil if BEG END range containing RE-CONTAINS."
(goto-char beg)
(eq (- end beg) (skip-chars-forward re-contains end))))
(defun mono-complete-backend-word-predict--prefix-and-words (is-partial)
"Return the prefix and word list.
When IS-PARTIAL is non-nil, an extra word is required."
(let ((words-limit-max
(+ mono-complete-backend-word-predict-ngram-max
;; Allow an extra word for partial completion.
;; Allow an extra word for partial completion.
(pos-init (point))
(pos-step-prev nil)
(pos-beg nil)
(words (list))
(search t))
(while (and search (< (length words) words-limit-max) (not (eq pos-step-prev (point))))
(setq pos-step-prev (point))
(skip-chars-backward "\n[:blank:][:punct:]")
;; Early exit on full-stop, ! ... etc.
(point) pos-step-prev "^.!?:;)\\]}"))
(setq search nil))
(let ((pos-word-end (point)))
(skip-chars-backward "^\n[:blank:]")
(let ((pos-word-beg (point)))
(let ((word (buffer-substring-no-properties pos-word-beg pos-word-end)))
;; Ensure the word is ONLY A-Z-A-z and apostrophe.
((and (< pos-word-beg pos-word-end)
(string-empty-p (string-trim-left word "[[:alnum:]'-]*")))
(push word words)
(setq pos-beg pos-word-beg))
(setq search nil))))))))))
(mono-complete--debug-log "complete words: %S" words)
((<= words-limit-min (length words))
(cons (buffer-substring-no-properties pos-beg pos-init) words))
(cons nil nil)))))
(defun mono-complete-backend-word-predict--command-to-string (command &rest args)
"Execute shell COMMAND with ARGS and return its output as a string."
;; Handy to run outside of emacs for debugging the script it's self.
;; (printf "command: %S\n" args)
(let ((all-args
;; Command.
;; In-file.
;; Destination (stdout).
;; Display.
;; Ensure that `default-directory' exists and is readable.
;; Even though the default directory isn't used, `call-process'
;; will fail with an error when called with a buffer open that
;; references a directory that doesn't exist.
;; Assume the users home directory is valid and use this instead.
(default-directory (expand-file-name "~")))
(apply #'call-process all-args)
(defun mono-complete-backend-word-predict--input-paths-scan ()
"Return root directories."
(lambda (path)
(let ((path-expanded
((string-empty-p path)
(expand-file-name path)))))
(when path-expanded
(unless (file-exists-p path-expanded)
"mono-complete-backend-word-predict-root-directories: skipping missing directory %S"
(setq path-expanded nil)))
(defun mono-complete-backend-word-predict--run-ext-util (text partial-word update-method)
"Run the external word prediction utility on TEXT.
PARTIAL-WORD may be an empty string,otherwise part of the word to complete.
UPDATE-METHOD the method used to check if the method needs to be updated."
;; Cache directory.
(file-name-concat (expand-file-name mono-complete-cache-directory) "word-predict")
;; Text to complete (or nothing to generate).
;; Complete using the word behind the cursor as a partial
;; Multiple roots.
(mapconcat #'identity (mono-complete-backend-word-predict--input-paths-scan) path-separator)
;; Input files size limit.
(number-to-string mono-complete-backend-word-predict-input-paths-size-limit)
;; Include source extensions.
(mapconcat #'identity mono-complete-backend-word-predict-input-paths-match-source
;; Include text extensions.
(mapconcat #'identity mono-complete-backend-word-predict-input-paths-match-text path-separator)
;; Update method.
;; ---------------------------------------------------------------------------
;; Callbacks
(defun mono-complete-backend-word-predict-setup (config)
"Setup on enabled mode (for this buffer).
TODO: support CONFIG."
;; Blank "text" is a signal to generate all models.
(let ((text
"" "" (symbol-name mono-complete-backend-word-predict-update-method))))
(unless (string-empty-p text)
(message "%s" text)))
(defun mono-complete-backend-word-predict-prefix ()
"Return the prefix at point."
(let ((is-partial nil))
(when (cond
;; After space, search for whole word.
((eq ?\s (preceding-char))
(mono-complete--debug-log "look for new word")
;; Typing in word, check if this word is a part of a longer word.
((mono-complete-backend-word-predict--range-contains (1- (point)) (point) "[:alpha:]'-")
(mono-complete--debug-log "look for partial word, or new word (fallback)")
(setq is-partial t)
;; When directly after non-ascii.
"look for word exiting, word chars or white-space not found before character")
(pcase-let ((`(,prefix . ,words)
(mono-complete-backend-word-predict--prefix-and-words is-partial)))
(when prefix
(when (add-text-properties 0 1 (list 'mono-complete-backend-word-predict words) prefix)
(defun mono-complete-backend-word-predict-complete (_config prefix cache)
"Complete at point based on PREFIX & CACHE."
;; Return a list of strings or nil.
(let ((result nil)
(partial-word "")
(words (get-text-property 0 'mono-complete-backend-word-predict prefix)))
(unless (eq ?\s (preceding-char))
;; Pop last, TODO: maybe worth a utility.
(setq words (nreverse words))
(setq partial-word (pop words))
(setq words (nreverse words)))
;; At least two words is always needed.
(when (>= (length words) 2)
(let ((text
(mapconcat #'identity words " ") partial-word
;; Updating should be handled when the mode is enabled.
(unless (string-empty-p text)
(setq result (list text)))))
(cons result cache)))
;; ---------------------------------------------------------------------------
;; Public Callback
(defun mono-complete-backend-word-predict ()
"DEBBREV completer."
:setup #'mono-complete-backend-word-predict-setup
:prefix #'mono-complete-backend-word-predict-prefix
:complete #'mono-complete-backend-word-predict-complete))
(provide 'mono-complete-backend-word-predict)
;; Local Variables:
;; fill-column: 99
;; indent-tabs-mode: nil
;; elisp-autofmt-format-quoted: nil
;; End:
;;; mono-complete-backend-word-predict.el ends here