/ src / utils / enhance-content.js
enhance-content.js
  1  import emojifyText from './emojify-text';
  2  import mem from './mem';
  3  
  4  const fauxDiv = document.createElement('div');
  5  const whitelistLinkClasses = ['u-url', 'mention', 'hashtag'];
  6  
  7  function _enhanceContent(content, opts = {}) {
  8    const { emojis, postEnhanceDOM = () => {} } = opts;
  9    let enhancedContent = content;
 10    const dom = document.createElement('div');
 11    dom.innerHTML = enhancedContent;
 12    const hasLink = /<a/i.test(enhancedContent);
 13    const hasCodeBlock = enhancedContent.indexOf('```') !== -1;
 14  
 15    if (hasLink) {
 16      // Add target="_blank" to all links with no target="_blank"
 17      // E.g. `note` in `account`
 18      const noTargetBlankLinks = Array.from(
 19        dom.querySelectorAll('a:not([target="_blank"])'),
 20      );
 21      noTargetBlankLinks.forEach((link) => {
 22        link.setAttribute('target', '_blank');
 23      });
 24  
 25      // Remove all classes except `u-url`, `mention`, `hashtag`
 26      const links = Array.from(dom.querySelectorAll('a[class]'));
 27      links.forEach((link) => {
 28        Array.from(link.classList).forEach((c) => {
 29          if (!whitelistLinkClasses.includes(c)) {
 30            link.classList.remove(c);
 31          }
 32        });
 33      });
 34    }
 35  
 36    // Add 'has-url-text' to all links that contains a url
 37    if (hasLink) {
 38      const links = Array.from(dom.querySelectorAll('a[href]'));
 39      links.forEach((link) => {
 40        if (/^https?:\/\//i.test(link.textContent.trim())) {
 41          link.classList.add('has-url-text');
 42        }
 43      });
 44    }
 45  
 46    // Spanify un-spanned mentions
 47    if (hasLink) {
 48      const links = Array.from(dom.querySelectorAll('a[href]'));
 49      const usernames = [];
 50      links.forEach((link) => {
 51        const text = link.innerText.trim();
 52        const hasChildren = link.querySelector('*');
 53        // If text looks like @username@domain, then it's a mention
 54        if (/^@[^@]+(@[^@]+)?$/g.test(text)) {
 55          // Only show @username
 56          const [_, username, domain] = text.split('@');
 57          if (!hasChildren) {
 58            if (
 59              !usernames.find(([u]) => u === username) ||
 60              usernames.find(([u, d]) => u === username && d === domain)
 61            ) {
 62              link.innerHTML = `@<span>${username}</span>`;
 63              usernames.push([username, domain]);
 64            } else {
 65              link.innerHTML = `@<span>${username}@${domain}</span>`;
 66            }
 67          }
 68          link.classList.add('mention');
 69        }
 70        // If text looks like #hashtag, then it's a hashtag
 71        if (/^#[^#]+$/g.test(text)) {
 72          if (!hasChildren) link.innerHTML = `#<span>${text.slice(1)}</span>`;
 73          link.classList.add('mention', 'hashtag');
 74        }
 75      });
 76    }
 77  
 78    // EMOJIS
 79    // ======
 80    // Convert :shortcode: to <img />
 81    let textNodes;
 82    if (enhancedContent.indexOf(':') !== -1) {
 83      textNodes = extractTextNodes(dom);
 84      textNodes.forEach((node) => {
 85        let html = node.nodeValue
 86          .replace(/&/g, '&amp;')
 87          .replace(/</g, '&lt;')
 88          .replace(/>/g, '&gt;');
 89        if (emojis) {
 90          html = emojifyText(html, emojis);
 91        }
 92        fauxDiv.innerHTML = html;
 93        const nodes = Array.from(fauxDiv.childNodes);
 94        node.replaceWith(...nodes);
 95      });
 96    }
 97  
 98    // CODE BLOCKS
 99    // ===========
100    // Convert ```code``` to <pre><code>code</code></pre>
101    if (hasCodeBlock) {
102      const blocks = Array.from(dom.querySelectorAll('p')).filter((p) =>
103        /^```[^]+```$/g.test(p.innerText.trim()),
104      );
105      blocks.forEach((block) => {
106        const pre = document.createElement('pre');
107        // Replace <br /> with newlines
108        block.querySelectorAll('br').forEach((br) => br.replaceWith('\n'));
109        pre.innerHTML = `<code>${block.innerHTML.trim()}</code>`;
110        block.replaceWith(pre);
111      });
112    }
113  
114    // Convert multi-paragraph code blocks to <pre><code>code</code></pre>
115    if (hasCodeBlock) {
116      const paragraphs = Array.from(dom.querySelectorAll('p'));
117      // Filter out paragraphs with ``` in beginning only
118      const codeBlocks = paragraphs.filter((p) => /^```/g.test(p.innerText));
119      // For each codeBlocks, get all paragraphs until the last paragraph with ``` at the end only
120      codeBlocks.forEach((block) => {
121        const nextParagraphs = [block];
122        let hasCodeBlock = false;
123        let currentBlock = block;
124        while (currentBlock.nextElementSibling) {
125          const next = currentBlock.nextElementSibling;
126          if (next && next.tagName === 'P') {
127            if (/```$/g.test(next.innerText)) {
128              nextParagraphs.push(next);
129              hasCodeBlock = true;
130              break;
131            } else {
132              nextParagraphs.push(next);
133            }
134          } else {
135            break;
136          }
137          currentBlock = next;
138        }
139        if (hasCodeBlock) {
140          const pre = document.createElement('pre');
141          nextParagraphs.forEach((p) => {
142            // Replace <br /> with newlines
143            p.querySelectorAll('br').forEach((br) => br.replaceWith('\n'));
144          });
145          const codeText = nextParagraphs.map((p) => p.innerHTML).join('\n\n');
146          pre.innerHTML = `<code tabindex="0">${codeText}</code>`;
147          block.replaceWith(pre);
148          nextParagraphs.forEach((p) => p.remove());
149        }
150      });
151    }
152  
153    // INLINE CODE
154    // ===========
155    // Convert `code` to <code>code</code>
156    if (enhancedContent.indexOf('`') !== -1) {
157      textNodes = extractTextNodes(dom);
158      textNodes.forEach((node) => {
159        let html = node.nodeValue
160          .replace(/&/g, '&amp;')
161          .replace(/</g, '&lt;')
162          .replace(/>/g, '&gt;');
163        if (/`[^`]+`/g.test(html)) {
164          html = html.replaceAll(/(`[^]+?`)/g, '<code>$1</code>');
165        }
166        fauxDiv.innerHTML = html;
167        const nodes = Array.from(fauxDiv.childNodes);
168        node.replaceWith(...nodes);
169      });
170    }
171  
172    // TWITTER USERNAMES
173    // =================
174    // Convert @username@twitter.com to <a href="https://twitter.com/username">@username@twitter.com</a>
175    if (/twitter\.com/i.test(enhancedContent)) {
176      textNodes = extractTextNodes(dom, {
177        rejectFilter: ['A'],
178      });
179      textNodes.forEach((node) => {
180        let html = node.nodeValue
181          .replace(/&/g, '&amp;')
182          .replace(/</g, '&lt;')
183          .replace(/>/g, '&gt;');
184        if (/@[a-zA-Z0-9_]+@twitter\.com/g.test(html)) {
185          html = html.replaceAll(
186            /(@([a-zA-Z0-9_]+)@twitter\.com)/g,
187            '<a href="https://twitter.com/$2" rel="nofollow noopener noreferrer" target="_blank">$1</a>',
188          );
189        }
190        fauxDiv.innerHTML = html;
191        const nodes = Array.from(fauxDiv.childNodes);
192        node.replaceWith(...nodes);
193      });
194    }
195  
196    // HASHTAG STUFFING
197    // ================
198    // Get the <p> that contains a lot of hashtags, add a class to it
199    if (enhancedContent.indexOf('#') !== -1) {
200      let prevIndex = null;
201      const hashtagStuffedParagraphs = Array.from(
202        dom.querySelectorAll('p'),
203      ).filter((p, index) => {
204        let hashtagCount = 0;
205        for (let i = 0; i < p.childNodes.length; i++) {
206          const node = p.childNodes[i];
207  
208          if (node.nodeType === Node.TEXT_NODE) {
209            const text = node.textContent.trim();
210            if (text !== '') {
211              return false;
212            }
213          } else if (node.tagName === 'BR') {
214            // Ignore <br />
215          } else if (node.tagName === 'A') {
216            const linkText = node.textContent.trim();
217            if (!linkText || !linkText.startsWith('#')) {
218              return false;
219            } else {
220              hashtagCount++;
221            }
222          } else {
223            return false;
224          }
225        }
226        // Only consider "stuffing" if:
227        // - there are more than 3 hashtags
228        // - there are more than 1 hashtag in adjacent paragraphs
229        if (hashtagCount > 3) {
230          prevIndex = index;
231          return true;
232        }
233        if (hashtagCount > 1 && prevIndex && index === prevIndex + 1) {
234          prevIndex = index;
235          return true;
236        }
237      });
238      if (hashtagStuffedParagraphs?.length) {
239        hashtagStuffedParagraphs.forEach((p) => {
240          p.classList.add('hashtag-stuffing');
241          p.title = p.innerText;
242        });
243      }
244    }
245  
246    if (postEnhanceDOM) {
247      postEnhanceDOM(dom); // mutate dom
248    }
249  
250    enhancedContent = dom.innerHTML;
251  
252    return enhancedContent;
253  }
254  const enhanceContent = mem(_enhanceContent);
255  
256  const defaultRejectFilter = [
257    // Document metadata
258    'STYLE',
259    // Image and multimedia
260    'IMG',
261    'VIDEO',
262    'AUDIO',
263    'AREA',
264    'MAP',
265    'TRACK',
266    // Embedded content
267    'EMBED',
268    'IFRAME',
269    'OBJECT',
270    'PICTURE',
271    'PORTAL',
272    'SOURCE',
273    // SVG and MathML
274    'SVG',
275    'MATH',
276    // Scripting
277    'CANVAS',
278    'NOSCRIPT',
279    'SCRIPT',
280    // Forms
281    'INPUT',
282    'OPTION',
283    'TEXTAREA',
284    // Web Components
285    'SLOT',
286    'TEMPLATE',
287  ];
288  const defaultRejectFilterMap = Object.fromEntries(
289    defaultRejectFilter.map((nodeName) => [nodeName, true]),
290  );
291  function extractTextNodes(dom, opts = {}) {
292    const textNodes = [];
293    const walk = document.createTreeWalker(
294      dom,
295      NodeFilter.SHOW_TEXT,
296      {
297        acceptNode(node) {
298          if (defaultRejectFilterMap[node.parentNode.nodeName]) {
299            return NodeFilter.FILTER_REJECT;
300          }
301          if (
302            opts.rejectFilter &&
303            opts.rejectFilter.includes(node.parentNode.nodeName)
304          ) {
305            return NodeFilter.FILTER_REJECT;
306          }
307          return NodeFilter.FILTER_ACCEPT;
308        },
309      },
310      false,
311    );
312    let node;
313    while ((node = walk.nextNode())) {
314      textNodes.push(node);
315    }
316    return textNodes;
317  }
318  
319  export default enhanceContent;