mirror of https://github.com/buster-so/buster.git
Added HTML entity decoding functions to Slack package and updated message retrieval to decode text in thread messages.
This commit is contained in:
parent
d21935613e
commit
01673d1c4a
|
@ -45,6 +45,7 @@ export * from './utils/validation-helpers';
|
|||
export * from './utils/message-formatter';
|
||||
export * from './utils/oauth-helpers';
|
||||
export { convertMarkdownToSlack } from './utils/markdown-to-slack';
|
||||
export { decodeHtmlEntities, decodeSlackMessageText } from './utils/html-entities';
|
||||
|
||||
// Reactions
|
||||
export { addReaction, removeReaction, getReactions } from './reactions';
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
import { WebClient } from '@slack/web-api';
|
||||
import { decodeSlackMessageText } from './utils/html-entities';
|
||||
|
||||
// Define our own simple types to avoid complex Slack API type issues
|
||||
interface SlackBlock {
|
||||
|
@ -45,9 +46,12 @@ export async function getThreadMessages({
|
|||
inclusive: true, // Include the parent message
|
||||
});
|
||||
|
||||
// Cast the result to our SlackMessage type
|
||||
// Cast the result to our SlackMessage type and decode HTML entities
|
||||
const messages = result.messages || [];
|
||||
return messages as SlackMessage[];
|
||||
return messages.map((message) => ({
|
||||
...message,
|
||||
text: decodeSlackMessageText(message.text),
|
||||
})) as SlackMessage[];
|
||||
} catch (error) {
|
||||
console.error('Failed to get thread messages:', error);
|
||||
throw error;
|
||||
|
@ -81,7 +85,11 @@ export async function getMessage({
|
|||
});
|
||||
|
||||
if (result.messages && result.messages.length > 0) {
|
||||
return result.messages[0] as SlackMessage;
|
||||
const message = result.messages[0];
|
||||
return {
|
||||
...message,
|
||||
text: decodeSlackMessageText(message?.text),
|
||||
} as SlackMessage;
|
||||
}
|
||||
|
||||
return null;
|
||||
|
|
|
@ -0,0 +1,152 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import { decodeHtmlEntities, decodeSlackMessageText } from './html-entities';
|
||||
|
||||
describe('decodeHtmlEntities', () => {
|
||||
it('should decode common HTML entities', () => {
|
||||
expect(decodeHtmlEntities('<')).toBe('<');
|
||||
expect(decodeHtmlEntities('>')).toBe('>');
|
||||
expect(decodeHtmlEntities('&')).toBe('&');
|
||||
expect(decodeHtmlEntities('"')).toBe('"');
|
||||
expect(decodeHtmlEntities(''')).toBe("'");
|
||||
expect(decodeHtmlEntities(''')).toBe("'");
|
||||
});
|
||||
|
||||
it('should decode multiple entities in a string', () => {
|
||||
expect(decodeHtmlEntities('<div>Hello & World</div>')).toBe(
|
||||
'<div>Hello & World</div>'
|
||||
);
|
||||
expect(decodeHtmlEntities('"Hello" & 'World'')).toBe(
|
||||
'"Hello" & \'World\''
|
||||
);
|
||||
});
|
||||
|
||||
it('should decode numeric character references', () => {
|
||||
// Decimal references
|
||||
expect(decodeHtmlEntities('<')).toBe('<');
|
||||
expect(decodeHtmlEntities('>')).toBe('>');
|
||||
expect(decodeHtmlEntities('&')).toBe('&');
|
||||
expect(decodeHtmlEntities('{')).toBe('{');
|
||||
expect(decodeHtmlEntities('}')).toBe('}');
|
||||
|
||||
// Hexadecimal references
|
||||
expect(decodeHtmlEntities('<')).toBe('<');
|
||||
expect(decodeHtmlEntities('>')).toBe('>');
|
||||
expect(decodeHtmlEntities('&')).toBe('&');
|
||||
expect(decodeHtmlEntities('{')).toBe('{');
|
||||
expect(decodeHtmlEntities('}')).toBe('}');
|
||||
|
||||
// Case insensitive hex
|
||||
expect(decodeHtmlEntities('<')).toBe('<');
|
||||
expect(decodeHtmlEntities('>')).toBe('>');
|
||||
});
|
||||
|
||||
it('should handle special characters', () => {
|
||||
expect(decodeHtmlEntities(' ')).toBe(' ');
|
||||
expect(decodeHtmlEntities('–')).toBe('–');
|
||||
expect(decodeHtmlEntities('—')).toBe('—');
|
||||
expect(decodeHtmlEntities('…')).toBe('…');
|
||||
expect(decodeHtmlEntities('“')).toBe('\u201C'); // Left double quotation mark
|
||||
expect(decodeHtmlEntities('”')).toBe('\u201D'); // Right double quotation mark
|
||||
});
|
||||
|
||||
it('should handle empty or undefined input', () => {
|
||||
expect(decodeHtmlEntities('')).toBe('');
|
||||
expect(decodeHtmlEntities(null as unknown as string)).toBe(null);
|
||||
expect(decodeHtmlEntities(undefined as unknown as string)).toBe(undefined);
|
||||
});
|
||||
|
||||
it('should preserve text without entities', () => {
|
||||
expect(decodeHtmlEntities('Hello World')).toBe('Hello World');
|
||||
expect(decodeHtmlEntities('No entities here!')).toBe('No entities here!');
|
||||
});
|
||||
|
||||
it('should handle repeated entities', () => {
|
||||
expect(decodeHtmlEntities('&&&')).toBe('&&&');
|
||||
expect(decodeHtmlEntities('<<<')).toBe('<<<');
|
||||
});
|
||||
|
||||
it('should decode entities in code examples', () => {
|
||||
const input = 'if (x < 10 && y > 5) { console.log("Hello"); }';
|
||||
const expected = 'if (x < 10 && y > 5) { console.log("Hello"); }';
|
||||
expect(decodeHtmlEntities(input)).toBe(expected);
|
||||
});
|
||||
});
|
||||
|
||||
describe('decodeSlackMessageText', () => {
|
||||
it('should decode HTML entities while preserving Slack user mentions', () => {
|
||||
const input = '<@U123456> said "Hello"';
|
||||
const expected = '<@U123456> said "Hello"';
|
||||
expect(decodeSlackMessageText(input)).toBe(expected);
|
||||
|
||||
// When mention is already properly formatted
|
||||
const input2 = '<@U123456> said "Hello"';
|
||||
const expected2 = '<@U123456> said "Hello"';
|
||||
expect(decodeSlackMessageText(input2)).toBe(expected2);
|
||||
});
|
||||
|
||||
it('should preserve Slack channel mentions', () => {
|
||||
const input = 'Check out <#C123456> for more info';
|
||||
const expected = 'Check out <#C123456> for more info';
|
||||
expect(decodeSlackMessageText(input)).toBe(expected);
|
||||
|
||||
// When channel mention is already properly formatted
|
||||
const input2 = 'Check out <#C123456> for more info';
|
||||
const expected2 = 'Check out <#C123456> for more info';
|
||||
expect(decodeSlackMessageText(input2)).toBe(expected2);
|
||||
});
|
||||
|
||||
it('should preserve Slack links', () => {
|
||||
const input = 'Visit <https://example.com|our website> for details';
|
||||
const expected = 'Visit <https://example.com|our website> for details';
|
||||
expect(decodeSlackMessageText(input)).toBe(expected);
|
||||
|
||||
// When link is already properly formatted
|
||||
const input2 = 'Visit <https://example.com|our website> for details';
|
||||
const expected2 = 'Visit <https://example.com|our website> for details';
|
||||
expect(decodeSlackMessageText(input2)).toBe(expected2);
|
||||
});
|
||||
|
||||
it('should preserve simple Slack URLs', () => {
|
||||
const input = 'Check <https://example.com>';
|
||||
const expected = 'Check <https://example.com>';
|
||||
expect(decodeSlackMessageText(input)).toBe(expected);
|
||||
|
||||
// When URL is already properly formatted
|
||||
const input2 = 'Check <https://example.com>';
|
||||
const expected2 = 'Check <https://example.com>';
|
||||
expect(decodeSlackMessageText(input2)).toBe(expected2);
|
||||
});
|
||||
|
||||
it('should decode entities in regular text while preserving Slack formatting', () => {
|
||||
const input = '<@U123456> wrote: <div>Hello & welcome</div> in <#C789012>';
|
||||
const expected = '<@U123456> wrote: <div>Hello & welcome</div> in <#C789012>';
|
||||
expect(decodeSlackMessageText(input)).toBe(expected);
|
||||
});
|
||||
|
||||
it('should handle mixed content with code blocks', () => {
|
||||
const input =
|
||||
'Here's the code: if (x < 10 && y > 5) { alert("Hi"); }';
|
||||
const expected = 'Here\'s the code: if (x < 10 && y > 5) { alert("Hi"); }';
|
||||
expect(decodeSlackMessageText(input)).toBe(expected);
|
||||
});
|
||||
|
||||
it('should handle undefined or empty input', () => {
|
||||
expect(decodeSlackMessageText(undefined)).toBe(undefined);
|
||||
expect(decodeSlackMessageText('')).toBe('');
|
||||
expect(decodeSlackMessageText(' ')).toBe(' ');
|
||||
});
|
||||
|
||||
it('should handle complex Slack messages', () => {
|
||||
const input =
|
||||
'<@U123456> mentioned <@U789012> in <#C345678>: "Check this <https://example.com|link> for the <code> example"';
|
||||
const expected =
|
||||
'<@U123456> mentioned <@U789012> in <#C345678>: "Check this <https://example.com|link> for the <code> example"';
|
||||
expect(decodeSlackMessageText(input)).toBe(expected);
|
||||
});
|
||||
|
||||
it('should handle messages with multiple entity types', () => {
|
||||
const input = 'Testing & more: <script>alert('XSS')</script>';
|
||||
const expected = "Testing & more: <script>alert('XSS')</script>";
|
||||
expect(decodeSlackMessageText(input)).toBe(expected);
|
||||
});
|
||||
});
|
|
@ -0,0 +1,134 @@
|
|||
/**
|
||||
* Decode HTML entities from Slack messages
|
||||
* Slack API returns text with HTML entities encoded (e.g., < for <, > for >, & for &)
|
||||
* This function decodes these entities back to their original characters
|
||||
*/
|
||||
|
||||
/**
|
||||
* Map of HTML entities to their decoded characters
|
||||
* Based on common entities found in Slack messages
|
||||
*/
|
||||
const HTML_ENTITIES: Record<string, string> = {
|
||||
'<': '<',
|
||||
'>': '>',
|
||||
'&': '&',
|
||||
'"': '"',
|
||||
''': "'",
|
||||
''': "'",
|
||||
''': "'",
|
||||
'/': '/',
|
||||
'/': '/',
|
||||
'`': '`',
|
||||
'`': '`',
|
||||
' ': ' ',
|
||||
' ': ' ',
|
||||
'–': '–',
|
||||
'—': '—',
|
||||
'…': '…',
|
||||
'“': '\u201C',
|
||||
'”': '\u201D',
|
||||
'‘': '\u2018',
|
||||
'’': '\u2019',
|
||||
};
|
||||
|
||||
/**
|
||||
* Decode HTML entities in a string
|
||||
* @param text - The text containing HTML entities
|
||||
* @returns The decoded text with HTML entities replaced by their characters
|
||||
*/
|
||||
export function decodeHtmlEntities(text: string): string {
|
||||
if (!text) {
|
||||
return text;
|
||||
}
|
||||
|
||||
// Replace known HTML entities
|
||||
let decodedText = text;
|
||||
for (const [entity, replacement] of Object.entries(HTML_ENTITIES)) {
|
||||
// Use global replace to handle multiple occurrences
|
||||
const regex = new RegExp(escapeRegExp(entity), 'g');
|
||||
decodedText = decodedText.replace(regex, replacement);
|
||||
}
|
||||
|
||||
// Handle numeric character references (e.g., { or {)
|
||||
// Decimal: {
|
||||
decodedText = decodedText.replace(/&#(\d+);/g, (_match, code) => {
|
||||
const charCode = Number.parseInt(code, 10);
|
||||
return String.fromCharCode(charCode);
|
||||
});
|
||||
|
||||
// Hexadecimal: { or {
|
||||
decodedText = decodedText.replace(/&#[xX]([0-9a-fA-F]+);/g, (_match, code) => {
|
||||
const charCode = Number.parseInt(code, 16);
|
||||
return String.fromCharCode(charCode);
|
||||
});
|
||||
|
||||
return decodedText;
|
||||
}
|
||||
|
||||
/**
|
||||
* Escape special regex characters in a string
|
||||
* @param string - The string to escape
|
||||
* @returns The escaped string safe for use in regex
|
||||
*/
|
||||
function escapeRegExp(string: string): string {
|
||||
return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
||||
}
|
||||
|
||||
/**
|
||||
* Decode HTML entities in Slack message text while preserving Slack-specific formatting
|
||||
* This function is aware of Slack's message format and preserves user/channel mentions
|
||||
* @param slackText - The Slack message text with potential HTML entities
|
||||
* @returns The decoded text with HTML entities replaced
|
||||
*/
|
||||
export function decodeSlackMessageText(slackText: string | undefined): string | undefined {
|
||||
if (!slackText) {
|
||||
return slackText;
|
||||
}
|
||||
|
||||
// Slack uses <@USERID> for user mentions and <#CHANNELID> for channel mentions
|
||||
// These should not be decoded as HTML entities, so we need to be careful
|
||||
// The < and > around these are actual HTML entities that should be decoded
|
||||
// But the < and > that are already part of mentions should be preserved
|
||||
|
||||
// First, protect Slack mentions by temporarily replacing them
|
||||
const mentionPlaceholders = new Map<string, string>();
|
||||
let placeholderIndex = 0;
|
||||
|
||||
// Protect user mentions <@USERID>
|
||||
let protectedText = slackText.replace(/<@[A-Z0-9]+>/g, (match) => {
|
||||
const placeholder = `__SLACK_USER_MENTION_${placeholderIndex++}__`;
|
||||
mentionPlaceholders.set(placeholder, match);
|
||||
return placeholder;
|
||||
});
|
||||
|
||||
// Protect channel mentions <#CHANNELID>
|
||||
protectedText = protectedText.replace(/<#[A-Z0-9]+>/g, (match) => {
|
||||
const placeholder = `__SLACK_CHANNEL_MENTION_${placeholderIndex++}__`;
|
||||
mentionPlaceholders.set(placeholder, match);
|
||||
return placeholder;
|
||||
});
|
||||
|
||||
// Protect links <URL|text>
|
||||
protectedText = protectedText.replace(/<[^>]+\|[^>]+>/g, (match) => {
|
||||
const placeholder = `__SLACK_LINK_${placeholderIndex++}__`;
|
||||
mentionPlaceholders.set(placeholder, match);
|
||||
return placeholder;
|
||||
});
|
||||
|
||||
// Protect simple links <URL>
|
||||
protectedText = protectedText.replace(/<(https?:\/\/[^>]+)>/g, (match) => {
|
||||
const placeholder = `__SLACK_URL_${placeholderIndex++}__`;
|
||||
mentionPlaceholders.set(placeholder, match);
|
||||
return placeholder;
|
||||
});
|
||||
|
||||
// Now decode HTML entities
|
||||
let decodedText = decodeHtmlEntities(protectedText);
|
||||
|
||||
// Restore the protected Slack mentions
|
||||
for (const [placeholder, original] of mentionPlaceholders) {
|
||||
decodedText = decodedText.replace(placeholder, original);
|
||||
}
|
||||
|
||||
return decodedText;
|
||||
}
|
Loading…
Reference in New Issue