From 01b861d28cdf2b8336ce48d73553f0c72df763e0 Mon Sep 17 00:00:00 2001 From: Andrew Conrad Date: Tue, 23 May 2017 00:50:14 -0500 Subject: [PATCH 1/3] Add partial support for formatting incoming HTML Enable the html_formatting var to use this. Currently supports bold, italics, and underlines with weechat attributes. Code, strikethroughs, quotes, and breaks/rules are supported with plain text decorating them. It would be reasonable to add support for colors and lists in the future. Unfortunately, some things will never be possible with the limitations of Weechat, but there may be ways to approximate them better. All unsupported HTML tags are simply stripped from the message. This mostly fixes #60. --- matrix.lua | 135 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 130 insertions(+), 5 deletions(-) diff --git a/matrix.lua b/matrix.lua index b79f500..96912ea 100644 --- a/matrix.lua +++ b/matrix.lua @@ -38,7 +38,6 @@ This script maps this as follows: Fix broken state after failed initial connect Fix parsing of multiple join messages Friendlier error message on bad user/password - Parse some HTML and turn into color/bold/etc Support weechat.look.prefix_same_nick ]] @@ -297,6 +296,130 @@ local function irc_formatting_to_weechat_color(s) return s end +local function get_diff_indices_from_string_x(x, y) + -- For html_formatting_and_body_to_weechat_color to extract HTML tags + -- Returns a table filled with a start and end index pair for each tag + local m = x:len() + local n = y:len() + local start = 0 + local memo = {} + -- Trim common leading characters, for extra speed + while start < m and start < n + and x:sub(start+1, start+1) == y:sub(start+1, start+1) do + start = start + 1 + end + for i = start, m + 1 do + memo[i] = {} + for j = start, n + 1 do + memo[i][j] = -1 + end + end + -- Determine length of the LCS + for i = start, m + 1 do + for j = start, n + 1 do + if i == start or j == start then + memo[i][j] = 0 + elseif x:sub(i,i) == y:sub(j,j) then + memo[i][j] = memo[i-1][j-1] + 1 + else + memo[i][j] = math.max(memo[i-1][j], memo[i][j-1]) + end + end + end + -- Work through the memo table to get the different parts in x + local indices = {} + while m > start or n > start do + if x:sub(m, m) == y:sub(n, n) then + m = m - 1 + n = n - 1 + elseif n > start and (m == start or memo[m][n-1] > memo[m-1][n]) then + n = n - 1 + else + table.insert(indices, m) + m = m - 1 + end + end + -- Filter out middle characters to only store start/end indices of each tag + local paired_indices = {} + local q = 1 + table.sort(indices) + while q < #indices do + local r = 1 + while (q + r <= #indices) and (indices[q + r] == indices[q] + r) do + r = r + 1 + end + table.insert(paired_indices, indices[q]) + table.insert(paired_indices, indices[q + r - 1]) + q = q + r + end + return paired_indices +end + +local function html_tag_to_weechat_attribute(tag) + -- TODO, deal with colors, lists, paragraphs, tables, preformatted text + -- Certain formattings, will never be accurate due to weechat limitations + local attribute + local adict = { + [""] = "bold", + [""] = "-bold", + [""] = "bold", + [""] = "-bold", + [""] = "italic", + [""] = "-italic", + [""] = "italic", + [""] = "-italic", + [""] = "underline", + [""] = "-underline", + ["

"] = "bold", + ["

"] = "-bold", + ["

"] = "bold", + ["

"] = "-bold", + ["

"] = "bold", + ["

"] = "-bold", + ["

"] = "bold", + ["

"] = "-bold", + ["
"] = "bold", + ["
"] = "-bold", + ["
"] = "bold", + ["
"] = "-bold", + } + local tdict = { + [""] = "```", + [""] = "```", + [""] = "~", + [""] = "~", + [""] = "~", + [""] = "~", + ["
"] = "> ", + ["
"] = "", + ["
"] = "- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -", + ["
"] = "\n", + } + attribute = adict[tag] + if attribute then + return w.color(attribute) + end + attribute = tdict[tag] + if attribute then + return attribute + else + return "" + end +end + +local function html_formatting_and_body_to_weechat_color(html_body, unformatted_body) + local weechat_body = html_body + local diff_indices = get_diff_indices_from_string_x(html_body, unformatted_body) + for i = #diff_indices, 1, -2 do + local start = diff_indices[i-1] + local stop = diff_indices[i] + local weechat_attribute + weechat_attribute = html_tag_to_weechat_attribute(html_body:sub(start,stop)) + weechat_body = html_body:sub(1, start-1) .. weechat_attribute .. weechat_body:sub(stop+1) + end + return weechat_body +end + function matrix_unload() w.print('', 'matrix: Unloading') -- Clear/free olm memory if loaded @@ -2367,10 +2490,11 @@ function Room:ParseChunk(chunk, backlog, chunktype) -- luacheck: ignore 542 if content['msgtype'] == 'm.text' then - -- TODO - -- Parse HTML here: - -- content.format = 'org.matrix.custom.html' - -- fontent.formatted_body... + if content['format'] == 'org.matrix.custom.html' + and w.config_get_plugin('html_formatting') == 'on' then + local html_body = content['formatted_body'] + body = html_formatting_and_body_to_weechat_color(html_body, body) + end elseif content['msgtype'] == 'm.image' then local url = content['url'] if type(url) ~= 'string' then @@ -3294,6 +3418,7 @@ if w.register(SCRIPT_NAME, SCRIPT_AUTHOR, SCRIPT_VERSION, SCRIPT_LICENSE, SCRIPT autojoin_on_invite = {'on', 'Automatically join rooms you are invited to'}, typing_notices = {'on', 'Send typing notices when you type'}, local_echo = {'on', 'Print lines locally instead of waiting for return from server'}, + html_formatting = {'off', 'When recieved messages have HTML formatting, use it if possible. Experimental'}, debug = {'off', 'Print a lot of extra information to help with finding bugs and other problems.'}, encrypted_message_color = {'lightgreen', 'Print encrypted mesages with this color'}, --olm_secret = {'', 'Password used to secure olm stores'}, From 0c77c6a81606a0fa3d59d59c35b66c2e56bf499c Mon Sep 17 00:00:00 2001 From: Andrew Conrad Date: Mon, 29 May 2017 21:12:50 -0500 Subject: [PATCH 2/3] html_formatting: add support for

and

Also note in TODO comment that contiguous tags are currently broken. --- matrix.lua | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/matrix.lua b/matrix.lua index 96912ea..af974f6 100644 --- a/matrix.lua +++ b/matrix.lua @@ -356,7 +356,7 @@ local function get_diff_indices_from_string_x(x, y) end local function html_tag_to_weechat_attribute(tag) - -- TODO, deal with colors, lists, paragraphs, tables, preformatted text + -- TODO, deal with colors, lists, tables, preformatted text, contiguous tags -- Certain formattings, will never be accurate due to weechat limitations local attribute local adict = { @@ -394,6 +394,10 @@ local function html_tag_to_weechat_attribute(tag) [""] = "", ["
"] = "- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -", ["
"] = "\n", + ["
"] = "", + ["
"] = "\n", + ["

"] = "", + ["

"] = "\n", } attribute = adict[tag] if attribute then From 05900c73d37074443b1dbf863411aad060d7aeb3 Mon Sep 17 00:00:00 2001 From: Andrew Conrad Date: Sat, 7 Oct 2017 14:33:01 -0500 Subject: [PATCH 3/3] html_formatting: Fix contiguous tags The most common formatting this allows is text with both italics and bold, but it fixes a very large amount of corner cases. --- matrix.lua | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/matrix.lua b/matrix.lua index af974f6..c07e79b 100644 --- a/matrix.lua +++ b/matrix.lua @@ -356,9 +356,9 @@ local function get_diff_indices_from_string_x(x, y) end local function html_tag_to_weechat_attribute(tag) - -- TODO, deal with colors, lists, tables, preformatted text, contiguous tags + -- TODO, deal with colors, lists, tables, preformatted text -- Certain formattings, will never be accurate due to weechat limitations - local attribute + local attribute = "" local adict = { [""] = "bold", [""] = "-bold", @@ -399,16 +399,20 @@ local function html_tag_to_weechat_attribute(tag) ["

"] = "", ["

"] = "\n", } - attribute = adict[tag] - if attribute then - return w.color(attribute) - end - attribute = tdict[tag] - if attribute then - return attribute - else - return "" + for s in string.gmatch(tag, "<.->") do + local attr + local t = "" .. s + attr = adict[t] + if attr then + attribute = attribute .. w.color(attr) + else + attr = tdict[t] + if attr then + attribute = attribute .. attr + end + end end + return attribute end local function html_formatting_and_body_to_weechat_color(html_body, unformatted_body)