// mautrix-whatsapp - A Matrix-WhatsApp puppeting bridge. // Copyright (C) 2024 Tulir Asokan // // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU Affero General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU Affero General Public License for more details. // // You should have received a copy of the GNU Affero General Public License // along with this program. If not, see . package msgconv import ( "context" "fmt" "html" "regexp" "strconv" "strings" "go.mau.fi/whatsmeow/proto/waE2E" "maunium.net/go/mautrix/bridgev2" "maunium.net/go/mautrix/event" ) func (mc *MessageConverter) convertTextMessage(ctx context.Context, msg *waE2E.Message) (part *bridgev2.ConvertedMessagePart, contextInfo *waE2E.ContextInfo) { part = &bridgev2.ConvertedMessagePart{ Type: event.EventMessage, Content: &event.MessageEventContent{ MsgType: event.MsgText, Body: msg.GetConversation(), }, } if len(msg.GetExtendedTextMessage().GetText()) > 0 { part.Content.Body = msg.GetExtendedTextMessage().GetText() } contextInfo = msg.GetExtendedTextMessage().GetContextInfo() mc.parseFormatting(part.Content, false, false) part.Content.BeeperLinkPreviews = mc.convertURLPreviewToBeeper(ctx, msg.GetExtendedTextMessage()) return } func (mc *MessageConverter) parseFormatting(content *event.MessageEventContent, allowInlineURL, forceHTML bool) { parsedHTML := parseWAFormattingToHTML(content.Body, allowInlineURL) if forceHTML || parsedHTML != event.TextToHTML(content.Body) { content.FormattedBody = parsedHTML content.Format = event.FormatHTML } } var italicRegex = regexp.MustCompile(`([\s>~*]|^)_(.+?)_([^a-zA-Z\d]|$)`) var boldRegex = regexp.MustCompile(`([\s>_~]|^)\*(.+?)\*([^a-zA-Z\d]|$)`) var strikethroughRegex = regexp.MustCompile(`([\s>_*]|^)~(.+?)~([^a-zA-Z\d]|$)`) var inlineCodeRegex = regexp.MustCompile("([\\s>_*~]|^)`(.+?)`([^a-zA-Z\\d]|$)") var inlineURLRegex = regexp.MustCompile(`\[(.+?)]\((.+?)\)`) var orderedListItemRegex = regexp.MustCompile(`^(\d{1,2})\. `) var waReplString = map[*regexp.Regexp]string{ italicRegex: "$1$2$3", boldRegex: "$1$2$3", strikethroughRegex: "$1$2$3", inlineCodeRegex: "$1$2$3", } func parseWASubFormattingLineToHTML(text string, allowInlineURL bool) string { text = html.EscapeString(text) for regex, replacement := range waReplString { text = regex.ReplaceAllString(text, replacement) } if allowInlineURL { text = inlineURLRegex.ReplaceAllStringFunc(text, func(s string) string { groups := inlineURLRegex.FindStringSubmatch(s) return fmt.Sprintf(`%s`, groups[2], groups[1]) }) } return text } func parseWASubFormattingToHTML(text string, allowInlineURL bool, output *strings.Builder) { lines := strings.Split(text, "\n") orderedListIdx := -1 inBulletedList := false wasBlockQuote := false for i, line := range lines { if i != 0 && orderedListIdx < 0 && !inBulletedList && !wasBlockQuote { output.WriteString("
") } wasBlockQuote = false if strings.HasPrefix(line, "* ") || strings.HasPrefix(line, "- ") { if orderedListIdx >= 0 { orderedListIdx = -1 output.WriteString("") } if !inBulletedList { output.WriteString("") inBulletedList = false } newIndex, _ := strconv.Atoi(match[1]) if orderedListIdx < 0 { if newIndex != 1 { _, _ = fmt.Fprintf(output, `
    `, match[1]) } else { output.WriteString("
      ") } orderedListIdx = newIndex - 1 } parsedLine := parseWASubFormattingLineToHTML(strings.TrimPrefix(line, match[0]), allowInlineURL) if orderedListIdx+1 != newIndex { _, _ = fmt.Fprintf(output, `
    1. %s
    2. `, match[1], parsedLine) } else { _, _ = fmt.Fprintf(output, "
    3. %s
    4. ", parsedLine) } orderedListIdx = newIndex } else if strings.HasPrefix(line, "> ") { if orderedListIdx >= 0 { orderedListIdx = -1 output.WriteString("
    ") } else if inBulletedList { output.WriteString("") inBulletedList = false } _, _ = fmt.Fprintf(output, "
    %s
    ", parseWASubFormattingLineToHTML(line[2:], allowInlineURL)) wasBlockQuote = true } else { if orderedListIdx >= 0 { orderedListIdx = -1 output.WriteString("
") } else if inBulletedList { output.WriteString("") inBulletedList = false } output.WriteString(parseWASubFormattingLineToHTML(line, allowInlineURL)) } } if orderedListIdx >= 0 { output.WriteString("") } else if inBulletedList { output.WriteString("") } } func parseWAFormattingToHTML(text string, allowInlineURL bool) string { var output strings.Builder codeBlockPtr := 0 for { relativeStartIdx := strings.Index(text[codeBlockPtr:], "```") if relativeStartIdx < 0 { break } absStartIdx := codeBlockPtr + relativeStartIdx relativeEndIdx := strings.Index(text[absStartIdx+3:], "```") if relativeEndIdx < 0 { break } absEndIdx := absStartIdx + 3 + relativeEndIdx + 3 // Don't allow code blocks without content, but check for another ``` in case it's a code block containing ```. // No need to check more than once because there'll always be at least ``` in the code block after the second try. if strings.TrimSpace(text[absStartIdx+3:absEndIdx-3]) == "" { relativeEndIdx = strings.Index(text[absEndIdx:], "```") if relativeEndIdx < 0 { break } absEndIdx += relativeEndIdx + 3 } prefix := text[codeBlockPtr:absStartIdx] content := text[absStartIdx+3 : absEndIdx-3] codeBlockPtr = absEndIdx if prefix != "" { parseWASubFormattingToHTML(prefix, allowInlineURL, &output) } if strings.ContainsRune(content, '\n') { _, _ = fmt.Fprintf(&output, "
%s
", html.EscapeString(content)) } else { _, _ = fmt.Fprintf(&output, "%s", html.EscapeString(content)) } } if codeBlockPtr < len(text) { parseWASubFormattingToHTML(text[codeBlockPtr:], allowInlineURL, &output) } return output.String() }