diff --git a/elfeed-show.el b/elfeed-show.el index ab1beb5..ab2186a 100644 --- a/elfeed-show.el +++ b/elfeed-show.el @@ -12,6 +12,7 @@ (eval-when-compile (require 'subr-x)) (require 'shr) +(require 'eww) (require 'elfeed) (require 'elfeed-search) @@ -72,6 +73,60 @@ Called without arguments." :group 'elfeed :type '(choice function)) +(defcustom elfeed-show-entry-retrieve-content-link 180 + "Possible matches for entries which contents should be using eww retrieved. +Entries have to contain link property. +Match can be either the maximum length of content in characters, +a matching regular expression, a boolean or a list of possible matches." + :group 'elfeed + :type '(choice + regexp + integer + boolean + (repeat (choice regexp + integer)))) + +(defcustom elfeed-show-entry-retrieve-content-link-readability eww-readable-urls + "Elfeed's variant of URL's where EWW's readability functions are applied. +Depends on `eww-readable'. Defaults to `eww-readable-urls' if available." + :group 'elfeed + :link '(variable-link eww-readable-urls) + :require #'eww-readable + :type '(repeat (choice (string :tag "Readable URL") + (cons :tag "URL and Readability" + (string :tag "URL") + (radio (const :tag "Readable" t) + (const :tag "Non-readable" nil)))))) + +(defvar-local elfeed-show-entry-readable nil + "If true entry was made readable.") + +(unless (fboundp #'eww-default-readable-p) + (defun elfeed-default-readable-p (url) + "Return non-nil if URL should be displayed in readable mode by default. +This consults the entries in +`elfeed-show-entry-retrieve-content-link-readability' (which see)." + (catch 'found + (let (result) + (dolist (regexp eww-readable-urls) + (if (consp regexp) + (setq result (cdr regexp) + regexp (car regexp)) + (setq result t)) + (when (string-match regexp url) + (throw 'found result)))))) + (defalias 'eww-default-readable-p 'elfeed-default-readable-p) + (defalias 'eww-readable-urls nil)) + +;; Emacs < 31 compatibility +(with-no-warnings + (if (fboundp 'eww-readable-dom) + (defalias 'elfeed-readable-dom #'eww-readable-dom) + (defun elfeed-readable-dom (dom) + "Return a readable version of DOM." + (eww-score-readability dom) + (eww-highest-readability dom)))) + (defvar elfeed-show-refresh-function #'elfeed-show-refresh--mail-style "Function called to refresh the `*elfeed-entry*' buffer.") @@ -79,6 +134,7 @@ Called without arguments." :doc "Keymap for `elfeed-show-mode'." :parent special-mode-map "d" #'elfeed-show-save-enclosure + "R" #'elfeed-show-refresh-togge-readable "n" #'elfeed-show-next "p" #'elfeed-show-prev "s" #'elfeed-show-new-live-search @@ -137,39 +193,143 @@ Called without arguments." (interactive nil elfeed-show-mode) (elfeed-show-tag 'unread)) -(defvar-local elfeed--insert-html-tick 0 +(defun elfeeed--html-if-doctype (_headers response-buffer) + "Return \"text/html\" if RESPONSE-BUFFER has an HTML doctype declaration. +HEADERS is unused." + ;; https://html.spec.whatwg.org/multipage/syntax.html#the-doctype + (with-current-buffer response-buffer + (let ((case-fold-search t)) + (save-excursion + (goto-char (point-min)) + ;; Match basic "" and also legacy variants as + ;; specified in link above -- being purposely lax about it. + (when (search-forward " to work around libxml-parse-html-region bug - (when base-url - (insert (format "" base-url))) - (insert html) - (libxml-parse-html-region (point-min) (point-max) base-url)) - '(i () "Elfeed: libxml2 functionality is unavailable"))) - (tick (incf elfeed--insert-html-tick)) - (orig (symbol-function 'url-queue-retrieve)) - ((symbol-function 'url-queue-retrieve) - (lambda (url cb &rest args) - (let ((cb (if (eq cb #'shr-image-fetched) - (lambda (status buffer &rest args) - (when (and (buffer-live-p buffer) - (= tick +(put 'elfeed--render-html-tick 'permanent-local t) + +(defun elfeed-render (status url &optional point buffer encode explicit-content-type) + (cl-letf* ((headers (eww-parse-headers)) + (content-type (or + explicit-content-type + (mail-header-parse-content-type + (if (zerop (length (cdr (assoc "content-type" headers)))) + (elfeed--guess-cotent-type-headers headers (current-buffer)) + (cdr (assoc "content-type" headers)))))) + (charset (intern + (downcase + (or (cdr (assq 'charset (cdr content-type))) + (eww-detect-charset (eww-html-p (car content-type))) + "utf-8")))) + (data-buffer (current-buffer)) + (shr-target-id (url-target (url-generic-parse-url url))) + (tick (incf elfeed--render-html-tick)) + (orig (symbol-function 'url-queue-retrieve)) + ((symbol-function 'url-queue-retrieve) + (lambda (url cb &rest args) + (let ((cb (if (eq cb #'shr-image-fetched) + (lambda (status buffer &rest args) + (when (and (buffer-live-p buffer) + (= tick (buffer-local-value - 'elfeed--insert-html-tick buffer))) - (apply #'shr-image-fetched status buffer args))) - cb))) - (apply orig url cb args))))) - (shr-insert-document doc))) + 'elfeed--render-html-tick buffer))) + (apply #'shr-image-fetched status buffer args))) + cb))) + (apply orig url cb args)))) + (last-coding-system-used)) + ;; Reset point after `eww-parse-headers' when CONTENT-TYPE was explicit + ;; we assume explicit content type means a document without headers + (when explicit-content-type + (goto-char (point-min))) + (let ((redirect (plist-get status :redirect))) + (when redirect + (setq url redirect))) + (when (buffer-live-p buffer) + (with-current-buffer buffer + ;; Make buffer listings more informative. + (setq list-buffers-directory url) + ;; Let the URL library have a handle to the current URL for + ;; referer purposes. + (setq url-current-lastloc (url-generic-parse-url url))) + (unwind-protect + (progn + (cond + ((and eww-use-external-browser-for-content-type + (string-match-p eww-use-external-browser-for-content-type + (car content-type))) + (erase-buffer) + (insert "Unsupported content type") + (insert (format "

Content-type %s is unsupported

" + (car content-type))) + (insert (format "Direct link to the document" + url)) + (goto-char (point-min)) + (elfeed-display-html (or encode charset) + url nil point buffer)) + ((equal (car content-type) "application/pdf") + (eww-display-pdf)) + ((string-match-p "\\`image/" (car content-type)) + (eww-display-image buffer)) + ((eww-html-p (car content-type)) + (elfeed-display-html (or encode charset) + url nil point buffer)) + (t + (elfeed-display-raw buffer (or encode charset 'utf-8)))) + (with-current-buffer buffer + (plist-put eww-data :url url) + (and last-coding-system-used + (set-buffer-file-coding-system last-coding-system-used)) + (unless shr-fill-text + (visual-line-mode) + (visual-wrap-prefix-mode)) + (run-hooks 'eww-after-render-hook) + ;; Enable undo again so that undo works in text input + ;; boxes. + (setq buffer-undo-list nil))) + (kill-buffer data-buffer))) + (unless (buffer-live-p buffer) + (kill-buffer data-buffer)))) + +(defun elfeed-display-html (charset url &optional document point buffer) + (let ((source (buffer-substring (point) (point-max)))) + (with-current-buffer buffer + (plist-put document :source source))) + (unless document + (let ((dom (eww--parse-html-region (point) (point-max) charset)) + (eww-readable-urls elfeed-show-entry-retrieve-content-link-readability)) + (when (with-current-buffer buffer + (or (and elfeed-show-entry-readable + (not (eww-default-readable-p url))) + (and (eww-default-readable-p url) + (not elfeed-show-entry-readable)))) + (setq dom (elfeed-readable-dom dom)) + (setq elfeed-show-entry-readable t)) + (setq document (eww-document-base url dom)))) + (with-current-buffer buffer + (let ((inhibit-read-only t) + (inhibit-modification-hooks t)) + (elfeed-display-document document point buffer)))) + +(defun elfeed-display-raw (buffer &optional encode) + (let ((data (buffer-substring (point) (point-max)))) + (unless (buffer-live-p buffer) + (error "Buffer %s doesn't exist" buffer)) + (with-current-buffer buffer + (let ((inhibit-read-only t)) + (insert data) + (condition-case nil + (decode-coding-region (point-min) (1+ (length data)) encode) + (coding-system-error nil))) + (goto-char (point-min))))) (cl-defun elfeed-insert-link (url &optional (content url)) "Insert a clickable hyperlink to URL titled CONTENT." @@ -244,12 +404,99 @@ Links are relative to BASE-URL if non-nil." do (elfeed-insert-link (car enclosure)) do (insert "\n")) (insert "\n") - (if content - (if (eq type 'html) - (elfeed-insert-html content base) - (insert content)) - (insert (propertize "(empty)\n" 'face 'italic))) - (goto-char (point-min)))) + (cond ((and elfeed-show-entry-retrieve-content-link + (or + ;; regexp + (and (stringp elfeed-show-entry-retrieve-content-link) + (string-match elfeed-show-entry-retrieve-content-link + link)) + ;; number of characters + (and (integerp elfeed-show-entry-retrieve-content-link) + (or (not content) + (<= (length content) elfeed-show-entry-retrieve-content-link))) + ;; list of either above + (and (listp elfeed-show-entry-retrieve-content-link) + (let ((retrieve-conditions elfeed-show-entry-retrieve-content-link) + condition + (result t)) + ;; Loop through until no conditions are left or found a result + (while (and result + (setq condition (pop retrieve-conditions))) + (when (or (and + ;; regexp + (stringp condition) + (string-match condition + link)) + ;; number of characters + (and (integerp condition) + (or (not content) + (<= (length content) + condition)))) + ;; Make while condition fail a result has been found + (setq result nil))) + ;; Invert result + (not result))) + ;; a true boolean (by this point it can only be true) + (booleanp elfeed-show-entry-retrieve-content-link) + nil)) + (eww-retrieve link #'elfeed-render (list link nil (current-buffer)))) + (content + ;; FIXME: Elfeed doesn't really differentiate between content and description. + ;; Some entries might claim to have content but they don't. + ;; Calling `eww-retrieve' on those entries without content should be enough but yeah.. + (let ((content-buffer (current-buffer)) + (elfeed-show-entry-retrieve-content-link-readability nil) + (content-type `(,(cond ((eq type 'html) + "text/html") + (t "text/plain")) + (char-set . nil)))) + (with-temp-buffer + (insert content) + (elfeed-render nil link nil content-buffer nil content-type)))) + (t + (insert (propertize "(empty)\n" 'face 'italic)) + (goto-char (point-min)))))) + +(defun elfeed-display-document (document &optional point buffer) + (unless (fboundp 'libxml-parse-html-region) + (error "This function requires Emacs to be compiled with libxml2")) + (setq buffer (or buffer (current-buffer))) + (unless (buffer-live-p buffer) + (error "Buffer %s doesn't exist" buffer)) + ;; There should be a better way to abort loading images + ;; asynchronously. + (setq url-queue nil) + (let ((url (when (eq (car document) 'base) + (alist-get 'href (cadr document))))) + (unless url + (error "Document is missing base URL")) + (with-current-buffer buffer + (setq bidi-paragraph-direction nil) + (let ((inhibit-read-only t) + (inhibit-modification-hooks t) + ;; Possibly set by the caller, e.g., `eww-render' which + ;; preserves the old URL #target before chasing redirects. + (shr-target-id (or shr-target-id + (url-target (url-generic-parse-url url))))) + (with-delayed-message (2 "Rendering HTML...") + (shr-insert-document document)) + (cond + (point + (goto-char point)) + (shr-target-id + (goto-char (point-min)) + (let ((match (text-property-search-forward + 'shr-target-id shr-target-id #'member))) + (when match + (goto-char (prop-match-beginning match))))) + (t + (goto-char (point-min)) + ;; Don't leave point inside forms, because the normal eww + ;; commands aren't available there. + (while (and (not (eobp)) + (get-text-property (point) 'eww-form)) + (forward-line 1))))) + (eww-size-text-inputs)))) (defun elfeed-show-refresh (&rest _) "Update the buffer to match the selected entry. @@ -258,6 +505,24 @@ Used as `revert-buffer-function'." (interactive) (funcall elfeed-show-refresh-function)) + +(defun elfeed-show-refresh-togge-readable () + "Toggle display of only the main \"readable\" parts of the current web page. +This command uses heuristics to find the parts of the web page that +contain the main textual portion, leaving out navigation menus and the +like. + +If called interactively, toggle the display of the readable parts. If +the prefix argument is positive, display the readable parts, and if it +is zero or negative, display the full page. + +If called from Lisp, toggle the display of the readable parts if ARG is +`toggle'. Display the readable parts if ARG is nil, omitted, or is a +positive number. Display the full page if ARG is a negative number." + (interactive "" elfeed-show-mode) + (setq elfeed-show-entry-readable (not elfeed-show-entry-readable)) + (call-interactively elfeed-show-refresh-function)) + (defcustom elfeed-show-unique-buffers nil "When non-nil, every entry buffer gets a unique name. This allows for displaying multiple show buffers at the same diff --git a/elfeed.el b/elfeed.el index 682d0da..26f0366 100644 --- a/elfeed.el +++ b/elfeed.el @@ -658,16 +658,21 @@ called interactively, SAVE is set to t." ;; New entry filtering (cl-defun elfeed-make-tagger - (&key feed-title feed-url entry-title entry-link after before - add remove callback) - "Create a function that adds or removes tags on matching entries. - -FEED-TITLE, FEED-URL, ENTRY-TITLE, and ENTRY-LINK are regular -expressions or a list (not ), which indicates a negative -match. AFTER and BEFORE are relative times (see -`elfeed-time-duration'). Entries must match all provided + (&key feed-title feed-url feed-author entry-title entry-link + entry-enclosure entry-content-type after before + feed-meta entry-meta add remove callback) + "Create a function that adds, removes tags or does CALLBACK on matching entries. + +FEED-TITLE, FEED-URL, FEED-AUTHOR, ENTRY-TITLE, ENTRY-LINK, +ENTRY-ENCLOSURE, ENTRY-CONTENT-TYPE +are regular expressions or a list \(not \), +which indicates a negative match. FEED-META and ENTRY-META are +a list of key and value where car is the key and cadr is value. +The key and value are matched against the respective meta's. +AFTER and BEFORE are relative times +\(see `elfeed-time-duration'\). Entries must match all provided expressions. If an entry matches, add tags ADD and remove tags -REMOVE. Call CALLBACK for each entry. +REMOVE. Examples, @@ -698,8 +703,13 @@ The returned function should be added to `elfeed-new-entry-hook'." (when (and (match feed-title (elfeed-feed-title feed)) (match feed-url (elfeed-feed-url feed)) + (match feed-author (elfeed-feed-author feed)) + (match (car feed-meta) (elfeed-meta feed (cadr feed-meta))) (match entry-title (elfeed-entry-title entry)) (match entry-link (elfeed-entry-link entry)) + (match entry-content-type (elfeed-entry-content-type entry)) + (match entry-enclosure (elfeed-entry-enclosures entry)) + (match (car entry-meta) (elfeed-meta entry (cadr entry-meta))) (or (not after-time) (> date (- (float-time) after-time))) (or (not before-time) (< date (- (float-time) before-time)))) (when add