From ff39dd23e727d7d0c31535254653a1de7ab4ef1b Mon Sep 17 00:00:00 2001 From: Kyle Meyer Date: Thu, 11 Dec 2014 00:00:16 -0500 Subject: Anchor and restrict citekey regular expression Limit the characters that are allowed in the citekey to letters, digits, underscores, and hyphens. This allows the citekey to be treated as a word (temporarily expanding word constituents to include underscore and hyphen), as opposed to explicitly specifying the characters that can come before a citekey. --- NEWS | 4 ++++ bog-tests.el | 12 ++++++++++++ bog.el | 36 +++++++++++++++++++----------------- 3 files changed, 35 insertions(+), 17 deletions(-) diff --git a/NEWS b/NEWS index 7260c01..7c453f1 100644 --- a/NEWS +++ b/NEWS @@ -36,6 +36,10 @@ - =bog-goto-citekey-heading-in-buffer= and =bog-goto-citekey-heading-in-notes= will now widen the buffer if the heading is outside of the narrowed region. + +- The format for citekeys is now restricted to letters, digits, + underscores, and hyphens. + - =bog-notes-directory= has been renamed to =bog-note-directory=. * v0.6.0 diff --git a/bog-tests.el b/bog-tests.el index 598dc31..0173c5f 100644 --- a/bog-tests.el +++ b/bog-tests.el @@ -121,7 +121,19 @@ point at the beginning of the inserted text." (let ((citekey "hyphen-name2010word")) (with-temp-buffer (insert citekey) + ;; At beginning (goto-char (point-min)) + (should (equal (bog-citekey-at-point) citekey)) + ;; On hyphen + (skip-chars-forward "-") + (should (equal (bog-citekey-at-point) citekey)) + ;; After hyphen + (forward-char) + (should (equal (bog-citekey-at-point) citekey)) + ;; On word + (skip-chars-forward "0-9") + ;; At year + (skip-chars-forward "-a-z") (should (equal (bog-citekey-at-point) citekey))))) ;; `bog-citekey-from-tree' diff --git a/bog.el b/bog.el index 6addb31..11bca3d 100644 --- a/bog.el +++ b/bog.el @@ -41,13 +41,16 @@ :group 'org) (defcustom bog-citekey-format - "\\([a-z]+[-a-z]*\\)\\([0-9]\\{4\\}\\)\\([a-z][a-z0-9]*\\)" + "\\b\\([a-z]+[-a-z]*\\)\\([0-9]\\{4\\}\\)\\([a-z][a-z0-9]*\\)\\b" "Regular expression used to match study citekey. By default, this matches any sequence of lower case letters (allowing hyphenation) that is followed by 4 digits and then lower case letters. +The format must be anchored by '\b' and should be restricted to +letters, digits, '-', and '_'. + This is case-sensitive (i.e., case-fold-search will be set to nil). @@ -62,15 +65,6 @@ settings: :group 'bog :type 'string) -(defcustom bog-allowed-before-citekey - "\\(\n\\|\\s-\\|(\\|\\[\\|{\\|<\\|,\\)" - "Regex that specifies characters allowed before a citekey. -This may need to be modified if you have a custom -`bog-citekey-format' or if you tend to used a certain character -before citekeys that isn't included above." - :group 'bog - :type 'string) - (defcustom bog-citekey-property "CUSTOM_ID" "Property name used to store citekey. The default corresponds to the default value of @@ -244,17 +238,25 @@ year, and the first meaningful word in the title)." (mapconcat (lambda (g) (match-string-no-properties g citekey)) groups delim)))) +(defmacro bog--with-citekey-syntax (&rest body) + "Execute BODY with hyphen and underscore as word constituents." + (declare (indent 0)) + `(with-syntax-table (copy-syntax-table (syntax-table)) + (modify-syntax-entry ?- "w") + (modify-syntax-entry ?_ "w") + ,@body)) + (defun bog-citekey-at-point () "Return citekey at point. The citekey must have the format specified by -`bog-citekey-format' and, if not at the beginning of the buffer, -be preceded by a characters in `bog-allowed-before-citekey'." +`bog-citekey-format'. Hyphens and underscores are considered as +word constituents." (save-excursion - (unless (bobp) - (re-search-backward bog-allowed-before-citekey) - (forward-char 1)) - (and (looking-at bog-citekey-format) - (match-string-no-properties 0)))) + (bog--with-citekey-syntax + (skip-syntax-backward "w") + (let (case-fold-search) + (and (looking-at bog-citekey-format) + (match-string-no-properties 0)))))) (defun bog-citekey-from-surroundings () "Get the citekey from the context of the Org file." -- cgit v1.2.3