From 5e1f646cbcc0cc4ac08faf76afe5becba4dfd4c6 Mon Sep 17 00:00:00 2001
From: Marvin W <git@larma.de>
Date: Thu, 9 Jan 2020 13:43:02 +0100
Subject: Another set of changes to URL detection

---
 main/src/ui/util/helper.vala | 77 +++++++++++++++++++++++++-------------------
 1 file changed, 43 insertions(+), 34 deletions(-)

(limited to 'main/src/ui/util')

diff --git a/main/src/ui/util/helper.vala b/main/src/ui/util/helper.vala
index 940d5c9b..83f6b3dc 100644
--- a/main/src/ui/util/helper.vala
+++ b/main/src/ui/util/helper.vala
@@ -6,9 +6,10 @@ using Xmpp;
 
 namespace Dino.Ui.Util {
 
-private static Regex url_regex;
-private static Map<unichar, unichar> matching_chars;
-private const string[] allowed_schemes = {"http", "https", "ftp", "ftps", "irc", "ircs", "xmpp", "mailto", "sms", "smsto", "mms", "tel", "geo", "openpgp4fpr", "im", "news", "nntp", "sip", "ssh", "bitcoim", "sftp", "magnet", "vnc"};
+private static Regex URL_REGEX;
+private static Map<unichar, unichar> MATCHING_CHARS;
+private const unichar[] NON_TRAILING_CHARS = {'\'', '"', ',', '.', ';', '!', '?', '»', '”', '’', '`', '~', '‽'};
+private const string[] ALLOWED_SCHEMAS = {"http", "https", "ftp", "ftps", "irc", "ircs", "xmpp", "mailto", "sms", "smsto", "mms", "tel", "geo", "openpgp4fpr", "im", "news", "nntp", "sip", "ssh", "bitcoin", "sftp", "magnet", "vnc"};
 private const string[] tango_colors_light = {"FCE94F", "FCAF3E", "E9B96E", "8AE234", "729FCF", "AD7FA8", "EF2929"};
 private const string[] tango_colors_medium = {"EDD400", "F57900", "C17D11", "73D216", "3465A4", "75507B", "CC0000"};
 private const string[] material_colors_800 = {"D32F2F", "C2185B", "7B1FA2", "512DA8", "303F9F", "1976D2", "0288D1", "0097A7", "00796B", "388E3C", "689F38", "AFB42B", "FFA000", "F57C00", "E64A19", "5D4037"};
@@ -251,20 +252,20 @@ public static bool is_24h_format() {
 }
 
 public static Regex get_url_regex() {
-    if (url_regex == null) {
-        url_regex = /\b(((http|ftp)s?:\/\/|(ircs?|xmpp|mailto|sms|smsto|mms|tel|geo|openpgp4fpr|im|news|nntp|sip|ssh|bitcoin|sftp|magnet|vnc|urn):)([^\s,.;!?"'»”’]|[,.;!?"'»”’]\S)+)/;
+    if (URL_REGEX == null) {
+        URL_REGEX = /\b(((http|ftp)s?:\/\/|(ircs?|xmpp|mailto|sms|smsto|mms|tel|geo|openpgp4fpr|im|news|nntp|sip|ssh|bitcoin|sftp|magnet|vnc|urn):)\S+)/;
     }
-    return url_regex;
+    return URL_REGEX;
 }
 
 public static Map<unichar, unichar> get_matching_chars() {
-    if (matching_chars == null) {
-        matching_chars = new HashMap<unichar, unichar>();
-        matching_chars[")".get_char(0)] = "(".get_char(0);
-        matching_chars["]".get_char(0)] = "[".get_char(0);
-        matching_chars["}".get_char(0)] = "{".get_char(0);
+    if (MATCHING_CHARS == null) {
+        MATCHING_CHARS = new HashMap<unichar, unichar>();
+        MATCHING_CHARS[")".get_char(0)] = "(".get_char(0);
+        MATCHING_CHARS["]".get_char(0)] = "[".get_char(0);
+        MATCHING_CHARS["}".get_char(0)] = "{".get_char(0);
     }
-    return matching_chars;
+    return MATCHING_CHARS;
 }
 
 public static string parse_add_markup(string s_, string? highlight_word, bool parse_links, bool parse_text_markup, bool already_escaped_ = false) {
@@ -278,31 +279,39 @@ public static string parse_add_markup(string s_, string? highlight_word, bool pa
             int start, end;
             match_info.fetch_pos(0, out start, out end);
             string link = s[start:end];
-            Map<unichar, unichar> matching_chars = get_matching_chars();
-            unichar close_char;
-            int last_char_index = link.length;
-            while (link.get_prev_char(ref last_char_index, out close_char) && matching_chars.has_key(close_char)) {
-                unichar open_char = matching_chars[close_char];
-                unichar char;
-                int index = 0;
-                int open = 0, close = 0;
-                while (link.get_next_char(ref index, out char)) {
-                    if (char == open_char) {
-                        open++;
-                    } else if (char == close_char) {
-                        close++;
+            if (GLib.Uri.parse_scheme(link) in ALLOWED_SCHEMAS) {
+                Map<unichar, unichar> matching_chars = get_matching_chars();
+                unichar close_char;
+                int last_char_index = link.length;
+                while (link.get_prev_char(ref last_char_index, out close_char)) {
+                    if (matching_chars.has_key(close_char)) {
+                        unichar open_char = matching_chars[close_char];
+                        unichar char;
+                        int index = 0;
+                        int open = 0, close = 0;
+                        while (link.get_next_char(ref index, out char)) {
+                            if (char == open_char) {
+                                open++;
+                            } else if (char == close_char) {
+                                close++;
+                            }
+                        }
+                        if (close > open) {
+                            // Remove last char from url
+                            end -= close_char.to_string().length;
+                            link = s[start:end];
+                        } else {
+                            break;
+                        }
+                    } else if (close_char in NON_TRAILING_CHARS) {
+                        // Remove last char from url
+                        end -= close_char.to_string().length;
+                        link = s[start:end];
+                    } else {
+                        break;
                     }
                 }
-                if (close > open) {
-                    // Remove last char from url
-                    end -= close_char.to_string().length;
-                    link = s[start:end];
-                } else {
-                    break;
-                }
-            }
 
-            if (GLib.Uri.parse_scheme(link) in allowed_schemes) {
                 return parse_add_markup(s[0:start], highlight_word, parse_links, parse_text_markup, already_escaped) +
                         "<a href=\"" + Markup.escape_text(link) + "\">" + parse_add_markup(link, highlight_word, false, false, already_escaped) + "</a>" +
                         parse_add_markup(s[end:s.length], highlight_word, parse_links, parse_text_markup, already_escaped);
-- 
cgit v1.2.3-70-g09d2