From ce0537985efa6ccb8de190db09ee55169f6b191c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Kettunen?= Date: Fri, 27 Dec 2024 03:17:39 +0200 Subject: [PATCH 1/3] elfeed-make-tagger: Add more possible matches Also fix some minor errors reported by checkdoc. --- elfeed.el | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/elfeed.el b/elfeed.el index 682d0da..85ecb84 100644 --- a/elfeed.el +++ b/elfeed.el @@ -658,16 +658,18 @@ called interactively, SAVE is set to t." ;; New entry filtering (cl-defun elfeed-make-tagger - (&key feed-title feed-url entry-title entry-link after before + (&key feed-title feed-url feed-author entry-title entry-link + entry-enclosure entry-content-type after before add remove callback) - "Create a function that adds or removes tags on matching entries. + "Create a function that adds, removes tags or does CALLBACK on matching entries. -FEED-TITLE, FEED-URL, ENTRY-TITLE, and ENTRY-LINK are regular -expressions or a list (not ), which indicates a negative -match. AFTER and BEFORE are relative times (see -`elfeed-time-duration'). Entries must match all provided +FEED-TITLE, FEED-URL, FEED-AUTHOR, ENTRY-TITLE, ENTRY-LINK, +ENTRY-ENCLOSURE and ENTRY-CONTENT-TYPE +are regular expressions or a list \(not \), +which indicates a negative match. AFTER and BEFORE are relative times +\(see `elfeed-time-duration'\). Entries must match all provided expressions. If an entry matches, add tags ADD and remove tags -REMOVE. Call CALLBACK for each entry. +REMOVE. Examples, @@ -698,8 +700,11 @@ The returned function should be added to `elfeed-new-entry-hook'." (when (and (match feed-title (elfeed-feed-title feed)) (match feed-url (elfeed-feed-url feed)) + (match feed-author (elfeed-feed-author feed)) (match entry-title (elfeed-entry-title entry)) (match entry-link (elfeed-entry-link entry)) + (match entry-content-type (elfeed-entry-content-type entry)) + (match entry-enclosure (elfeed-entry-enclosures entry)) (or (not after-time) (> date (- (float-time) after-time))) (or (not before-time) (< date (- (float-time) before-time)))) (when add From 0141b525cdb77537ab82a0c5f2849ae5ee709160 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Kettunen?= Date: Fri, 27 Dec 2024 04:02:40 +0200 Subject: [PATCH 2/3] elfeed-make-tagger: Make it possible to match against values from meta --- elfeed.el | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/elfeed.el b/elfeed.el index 85ecb84..26f0366 100644 --- a/elfeed.el +++ b/elfeed.el @@ -660,13 +660,16 @@ called interactively, SAVE is set to t." (cl-defun elfeed-make-tagger (&key feed-title feed-url feed-author entry-title entry-link entry-enclosure entry-content-type after before - add remove callback) + feed-meta entry-meta add remove callback) "Create a function that adds, removes tags or does CALLBACK on matching entries. FEED-TITLE, FEED-URL, FEED-AUTHOR, ENTRY-TITLE, ENTRY-LINK, -ENTRY-ENCLOSURE and ENTRY-CONTENT-TYPE +ENTRY-ENCLOSURE, ENTRY-CONTENT-TYPE are regular expressions or a list \(not \), -which indicates a negative match. AFTER and BEFORE are relative times +which indicates a negative match. FEED-META and ENTRY-META are +a list of key and value where car is the key and cadr is value. +The key and value are matched against the respective meta's. +AFTER and BEFORE are relative times \(see `elfeed-time-duration'\). Entries must match all provided expressions. If an entry matches, add tags ADD and remove tags REMOVE. @@ -701,10 +704,12 @@ The returned function should be added to `elfeed-new-entry-hook'." (match feed-title (elfeed-feed-title feed)) (match feed-url (elfeed-feed-url feed)) (match feed-author (elfeed-feed-author feed)) + (match (car feed-meta) (elfeed-meta feed (cadr feed-meta))) (match entry-title (elfeed-entry-title entry)) (match entry-link (elfeed-entry-link entry)) (match entry-content-type (elfeed-entry-content-type entry)) (match entry-enclosure (elfeed-entry-enclosures entry)) + (match (car entry-meta) (elfeed-meta entry (cadr entry-meta))) (or (not after-time) (> date (- (float-time) after-time))) (or (not before-time) (< date (- (float-time) before-time)))) (when add From a93f5fa5791f7af31da89348e9b0361747058e08 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Kettunen?= Date: Sun, 22 Dec 2024 04:19:30 +0200 Subject: [PATCH 3/3] Refactor entry rendering after eww and optionally to retrieve content Refactor entry rendering after eww. Now there's a separate render function that can specifically handle each content type. The render function is used as callback for eww if the retrieve of content through the entry link is enabled. If this is not used the render function is called directly with the content string. The retrieval of content option allows for several conditions to enable it such as always, regular expression match against the article link or length of content. --- elfeed-show.el | 337 +++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 301 insertions(+), 36 deletions(-) diff --git a/elfeed-show.el b/elfeed-show.el index ab1beb5..ab2186a 100644 --- a/elfeed-show.el +++ b/elfeed-show.el @@ -12,6 +12,7 @@ (eval-when-compile (require 'subr-x)) (require 'shr) +(require 'eww) (require 'elfeed) (require 'elfeed-search) @@ -72,6 +73,60 @@ Called without arguments." :group 'elfeed :type '(choice function)) +(defcustom elfeed-show-entry-retrieve-content-link 180 + "Possible matches for entries which contents should be using eww retrieved. +Entries have to contain link property. +Match can be either the maximum length of content in characters, +a matching regular expression, a boolean or a list of possible matches." + :group 'elfeed + :type '(choice + regexp + integer + boolean + (repeat (choice regexp + integer)))) + +(defcustom elfeed-show-entry-retrieve-content-link-readability eww-readable-urls + "Elfeed's variant of URL's where EWW's readability functions are applied. +Depends on `eww-readable'. Defaults to `eww-readable-urls' if available." + :group 'elfeed + :link '(variable-link eww-readable-urls) + :require #'eww-readable + :type '(repeat (choice (string :tag "Readable URL") + (cons :tag "URL and Readability" + (string :tag "URL") + (radio (const :tag "Readable" t) + (const :tag "Non-readable" nil)))))) + +(defvar-local elfeed-show-entry-readable nil + "If true entry was made readable.") + +(unless (fboundp #'eww-default-readable-p) + (defun elfeed-default-readable-p (url) + "Return non-nil if URL should be displayed in readable mode by default. +This consults the entries in +`elfeed-show-entry-retrieve-content-link-readability' (which see)." + (catch 'found + (let (result) + (dolist (regexp eww-readable-urls) + (if (consp regexp) + (setq result (cdr regexp) + regexp (car regexp)) + (setq result t)) + (when (string-match regexp url) + (throw 'found result)))))) + (defalias 'eww-default-readable-p 'elfeed-default-readable-p) + (defalias 'eww-readable-urls nil)) + +;; Emacs < 31 compatibility +(with-no-warnings + (if (fboundp 'eww-readable-dom) + (defalias 'elfeed-readable-dom #'eww-readable-dom) + (defun elfeed-readable-dom (dom) + "Return a readable version of DOM." + (eww-score-readability dom) + (eww-highest-readability dom)))) + (defvar elfeed-show-refresh-function #'elfeed-show-refresh--mail-style "Function called to refresh the `*elfeed-entry*' buffer.") @@ -79,6 +134,7 @@ Called without arguments." :doc "Keymap for `elfeed-show-mode'." :parent special-mode-map "d" #'elfeed-show-save-enclosure + "R" #'elfeed-show-refresh-togge-readable "n" #'elfeed-show-next "p" #'elfeed-show-prev "s" #'elfeed-show-new-live-search @@ -137,39 +193,143 @@ Called without arguments." (interactive nil elfeed-show-mode) (elfeed-show-tag 'unread)) -(defvar-local elfeed--insert-html-tick 0 +(defun elfeeed--html-if-doctype (_headers response-buffer) + "Return \"text/html\" if RESPONSE-BUFFER has an HTML doctype declaration. +HEADERS is unused." + ;; https://html.spec.whatwg.org/multipage/syntax.html#the-doctype + (with-current-buffer response-buffer + (let ((case-fold-search t)) + (save-excursion + (goto-char (point-min)) + ;; Match basic "" and also legacy variants as + ;; specified in link above -- being purposely lax about it. + (when (search-forward " to work around libxml-parse-html-region bug - (when base-url - (insert (format "" base-url))) - (insert html) - (libxml-parse-html-region (point-min) (point-max) base-url)) - '(i () "Elfeed: libxml2 functionality is unavailable"))) - (tick (incf elfeed--insert-html-tick)) - (orig (symbol-function 'url-queue-retrieve)) - ((symbol-function 'url-queue-retrieve) - (lambda (url cb &rest args) - (let ((cb (if (eq cb #'shr-image-fetched) - (lambda (status buffer &rest args) - (when (and (buffer-live-p buffer) - (= tick +(put 'elfeed--render-html-tick 'permanent-local t) + +(defun elfeed-render (status url &optional point buffer encode explicit-content-type) + (cl-letf* ((headers (eww-parse-headers)) + (content-type (or + explicit-content-type + (mail-header-parse-content-type + (if (zerop (length (cdr (assoc "content-type" headers)))) + (elfeed--guess-cotent-type-headers headers (current-buffer)) + (cdr (assoc "content-type" headers)))))) + (charset (intern + (downcase + (or (cdr (assq 'charset (cdr content-type))) + (eww-detect-charset (eww-html-p (car content-type))) + "utf-8")))) + (data-buffer (current-buffer)) + (shr-target-id (url-target (url-generic-parse-url url))) + (tick (incf elfeed--render-html-tick)) + (orig (symbol-function 'url-queue-retrieve)) + ((symbol-function 'url-queue-retrieve) + (lambda (url cb &rest args) + (let ((cb (if (eq cb #'shr-image-fetched) + (lambda (status buffer &rest args) + (when (and (buffer-live-p buffer) + (= tick (buffer-local-value - 'elfeed--insert-html-tick buffer))) - (apply #'shr-image-fetched status buffer args))) - cb))) - (apply orig url cb args))))) - (shr-insert-document doc))) + 'elfeed--render-html-tick buffer))) + (apply #'shr-image-fetched status buffer args))) + cb))) + (apply orig url cb args)))) + (last-coding-system-used)) + ;; Reset point after `eww-parse-headers' when CONTENT-TYPE was explicit + ;; we assume explicit content type means a document without headers + (when explicit-content-type + (goto-char (point-min))) + (let ((redirect (plist-get status :redirect))) + (when redirect + (setq url redirect))) + (when (buffer-live-p buffer) + (with-current-buffer buffer + ;; Make buffer listings more informative. + (setq list-buffers-directory url) + ;; Let the URL library have a handle to the current URL for + ;; referer purposes. + (setq url-current-lastloc (url-generic-parse-url url))) + (unwind-protect + (progn + (cond + ((and eww-use-external-browser-for-content-type + (string-match-p eww-use-external-browser-for-content-type + (car content-type))) + (erase-buffer) + (insert "Unsupported content type") + (insert (format "

Content-type %s is unsupported

" + (car content-type))) + (insert (format "Direct link to the document" + url)) + (goto-char (point-min)) + (elfeed-display-html (or encode charset) + url nil point buffer)) + ((equal (car content-type) "application/pdf") + (eww-display-pdf)) + ((string-match-p "\\`image/" (car content-type)) + (eww-display-image buffer)) + ((eww-html-p (car content-type)) + (elfeed-display-html (or encode charset) + url nil point buffer)) + (t + (elfeed-display-raw buffer (or encode charset 'utf-8)))) + (with-current-buffer buffer + (plist-put eww-data :url url) + (and last-coding-system-used + (set-buffer-file-coding-system last-coding-system-used)) + (unless shr-fill-text + (visual-line-mode) + (visual-wrap-prefix-mode)) + (run-hooks 'eww-after-render-hook) + ;; Enable undo again so that undo works in text input + ;; boxes. + (setq buffer-undo-list nil))) + (kill-buffer data-buffer))) + (unless (buffer-live-p buffer) + (kill-buffer data-buffer)))) + +(defun elfeed-display-html (charset url &optional document point buffer) + (let ((source (buffer-substring (point) (point-max)))) + (with-current-buffer buffer + (plist-put document :source source))) + (unless document + (let ((dom (eww--parse-html-region (point) (point-max) charset)) + (eww-readable-urls elfeed-show-entry-retrieve-content-link-readability)) + (when (with-current-buffer buffer + (or (and elfeed-show-entry-readable + (not (eww-default-readable-p url))) + (and (eww-default-readable-p url) + (not elfeed-show-entry-readable)))) + (setq dom (elfeed-readable-dom dom)) + (setq elfeed-show-entry-readable t)) + (setq document (eww-document-base url dom)))) + (with-current-buffer buffer + (let ((inhibit-read-only t) + (inhibit-modification-hooks t)) + (elfeed-display-document document point buffer)))) + +(defun elfeed-display-raw (buffer &optional encode) + (let ((data (buffer-substring (point) (point-max)))) + (unless (buffer-live-p buffer) + (error "Buffer %s doesn't exist" buffer)) + (with-current-buffer buffer + (let ((inhibit-read-only t)) + (insert data) + (condition-case nil + (decode-coding-region (point-min) (1+ (length data)) encode) + (coding-system-error nil))) + (goto-char (point-min))))) (cl-defun elfeed-insert-link (url &optional (content url)) "Insert a clickable hyperlink to URL titled CONTENT." @@ -244,12 +404,99 @@ Links are relative to BASE-URL if non-nil." do (elfeed-insert-link (car enclosure)) do (insert "\n")) (insert "\n") - (if content - (if (eq type 'html) - (elfeed-insert-html content base) - (insert content)) - (insert (propertize "(empty)\n" 'face 'italic))) - (goto-char (point-min)))) + (cond ((and elfeed-show-entry-retrieve-content-link + (or + ;; regexp + (and (stringp elfeed-show-entry-retrieve-content-link) + (string-match elfeed-show-entry-retrieve-content-link + link)) + ;; number of characters + (and (integerp elfeed-show-entry-retrieve-content-link) + (or (not content) + (<= (length content) elfeed-show-entry-retrieve-content-link))) + ;; list of either above + (and (listp elfeed-show-entry-retrieve-content-link) + (let ((retrieve-conditions elfeed-show-entry-retrieve-content-link) + condition + (result t)) + ;; Loop through until no conditions are left or found a result + (while (and result + (setq condition (pop retrieve-conditions))) + (when (or (and + ;; regexp + (stringp condition) + (string-match condition + link)) + ;; number of characters + (and (integerp condition) + (or (not content) + (<= (length content) + condition)))) + ;; Make while condition fail a result has been found + (setq result nil))) + ;; Invert result + (not result))) + ;; a true boolean (by this point it can only be true) + (booleanp elfeed-show-entry-retrieve-content-link) + nil)) + (eww-retrieve link #'elfeed-render (list link nil (current-buffer)))) + (content + ;; FIXME: Elfeed doesn't really differentiate between content and description. + ;; Some entries might claim to have content but they don't. + ;; Calling `eww-retrieve' on those entries without content should be enough but yeah.. + (let ((content-buffer (current-buffer)) + (elfeed-show-entry-retrieve-content-link-readability nil) + (content-type `(,(cond ((eq type 'html) + "text/html") + (t "text/plain")) + (char-set . nil)))) + (with-temp-buffer + (insert content) + (elfeed-render nil link nil content-buffer nil content-type)))) + (t + (insert (propertize "(empty)\n" 'face 'italic)) + (goto-char (point-min)))))) + +(defun elfeed-display-document (document &optional point buffer) + (unless (fboundp 'libxml-parse-html-region) + (error "This function requires Emacs to be compiled with libxml2")) + (setq buffer (or buffer (current-buffer))) + (unless (buffer-live-p buffer) + (error "Buffer %s doesn't exist" buffer)) + ;; There should be a better way to abort loading images + ;; asynchronously. + (setq url-queue nil) + (let ((url (when (eq (car document) 'base) + (alist-get 'href (cadr document))))) + (unless url + (error "Document is missing base URL")) + (with-current-buffer buffer + (setq bidi-paragraph-direction nil) + (let ((inhibit-read-only t) + (inhibit-modification-hooks t) + ;; Possibly set by the caller, e.g., `eww-render' which + ;; preserves the old URL #target before chasing redirects. + (shr-target-id (or shr-target-id + (url-target (url-generic-parse-url url))))) + (with-delayed-message (2 "Rendering HTML...") + (shr-insert-document document)) + (cond + (point + (goto-char point)) + (shr-target-id + (goto-char (point-min)) + (let ((match (text-property-search-forward + 'shr-target-id shr-target-id #'member))) + (when match + (goto-char (prop-match-beginning match))))) + (t + (goto-char (point-min)) + ;; Don't leave point inside forms, because the normal eww + ;; commands aren't available there. + (while (and (not (eobp)) + (get-text-property (point) 'eww-form)) + (forward-line 1))))) + (eww-size-text-inputs)))) (defun elfeed-show-refresh (&rest _) "Update the buffer to match the selected entry. @@ -258,6 +505,24 @@ Used as `revert-buffer-function'." (interactive) (funcall elfeed-show-refresh-function)) + +(defun elfeed-show-refresh-togge-readable () + "Toggle display of only the main \"readable\" parts of the current web page. +This command uses heuristics to find the parts of the web page that +contain the main textual portion, leaving out navigation menus and the +like. + +If called interactively, toggle the display of the readable parts. If +the prefix argument is positive, display the readable parts, and if it +is zero or negative, display the full page. + +If called from Lisp, toggle the display of the readable parts if ARG is +`toggle'. Display the readable parts if ARG is nil, omitted, or is a +positive number. Display the full page if ARG is a negative number." + (interactive "" elfeed-show-mode) + (setq elfeed-show-entry-readable (not elfeed-show-entry-readable)) + (call-interactively elfeed-show-refresh-function)) + (defcustom elfeed-show-unique-buffers nil "When non-nil, every entry buffer gets a unique name. This allows for displaying multiple show buffers at the same