mirror of
https://github.com/go-gitea/gitea.git
synced 2026-05-28 07:45:27 +00:00
chore(deps): update chroma, regexp2 v2, replace dimiro1/reply (#37858)
- Update `github.com/alecthomas/chroma/v2` to `v2.25.0`. - Migrate `github.com/dlclark/regexp2` to `/v2` (incorporates https://github.com/go-gitea/gitea/pull/37664); drop the renovate pin. - Replace the unmaintained `github.com/dimiro1/reply` (the last consumer of `regexp2` v1 in our own code) with a small built-in reply parser for incoming mail. Signed-off-by: wxiaoguang <wxiaoguang@gmail.com> Co-authored-by: Claude (Opus 4.7) <noreply@anthropic.com> Co-authored-by: wxiaoguang <wxiaoguang@gmail.com> Co-authored-by: Giteabot <teabot@gitea.io> Co-authored-by: Nicolas <bircni@icloud.com>
This commit is contained in:
9
assets/go-licenses.json
generated
9
assets/go-licenses.json
generated
@@ -440,13 +440,8 @@
|
||||
"licenseText": "ISC License\n\nCopyright (c) 2012-2016 Dave Collins \u003cdave@davec.name\u003e\n\nPermission to use, copy, modify, and/or distribute this software for any\npurpose with or without fee is hereby granted, provided that the above\ncopyright notice and this permission notice appear in all copies.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\" AND THE AUTHOR DISCLAIMS ALL WARRANTIES\nWITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF\nMERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR\nANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES\nWHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN\nACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF\nOR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.\n"
|
||||
},
|
||||
{
|
||||
"name": "github.com/dimiro1/reply",
|
||||
"path": "github.com/dimiro1/reply/LICENSE",
|
||||
"licenseText": "MIT License\n\nCopyright (c) Discourse\nCopyright (c) Claudemiro\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the \"Software\"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all\ncopies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\nSOFTWARE.\n"
|
||||
},
|
||||
{
|
||||
"name": "github.com/dlclark/regexp2",
|
||||
"path": "github.com/dlclark/regexp2/LICENSE",
|
||||
"name": "github.com/dlclark/regexp2/v2",
|
||||
"path": "github.com/dlclark/regexp2/v2/LICENSE",
|
||||
"licenseText": "The MIT License (MIT)\n\nCopyright (c) Doug Clark\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the \"Software\"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all\ncopies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\nSOFTWARE.\n"
|
||||
},
|
||||
{
|
||||
|
||||
6
go.mod
6
go.mod
@@ -23,7 +23,7 @@ require (
|
||||
github.com/ProtonMail/go-crypto v1.4.1
|
||||
github.com/PuerkitoBio/goquery v1.12.0
|
||||
github.com/SaveTheRbtz/zstd-seekable-format-go/pkg v0.8.3
|
||||
github.com/alecthomas/chroma/v2 v2.24.1
|
||||
github.com/alecthomas/chroma/v2 v2.25.0
|
||||
github.com/aws/aws-sdk-go-v2/credentials v1.19.16
|
||||
github.com/aws/aws-sdk-go-v2/service/codecommit v1.33.14
|
||||
github.com/blakesmith/ar v0.0.0-20190502131153-809d4375e1fb
|
||||
@@ -33,8 +33,7 @@ require (
|
||||
github.com/caddyserver/certmagic v0.25.3
|
||||
github.com/charmbracelet/git-lfs-transfer v0.1.1-0.20260309112543-12416315a635
|
||||
github.com/chi-middleware/proxy v1.1.1
|
||||
github.com/dimiro1/reply v0.0.0-20200315094148-d0136a4c9e21
|
||||
github.com/dlclark/regexp2 v1.12.0
|
||||
github.com/dlclark/regexp2/v2 v2.1.0
|
||||
github.com/dsnet/compress v0.0.2-0.20230904184137-39efe44ab707
|
||||
github.com/dustin/go-humanize v1.0.1
|
||||
github.com/editorconfig/editorconfig-core-go/v2 v2.6.4
|
||||
@@ -178,6 +177,7 @@ require (
|
||||
github.com/cyphar/filepath-securejoin v0.6.1 // indirect
|
||||
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
|
||||
github.com/davidmz/go-pageant v1.0.2 // indirect
|
||||
github.com/dlclark/regexp2 v1.12.0 // indirect
|
||||
github.com/emersion/go-sasl v0.0.0-20241020182733-b788ff22d5a6 // indirect
|
||||
github.com/fatih/color v1.19.0 // indirect
|
||||
github.com/fxamacker/cbor/v2 v2.9.2 // indirect
|
||||
|
||||
9
go.sum
9
go.sum
@@ -76,8 +76,8 @@ github.com/SaveTheRbtz/zstd-seekable-format-go/pkg v0.8.3/go.mod h1:bnXbvnI9Mfqd
|
||||
github.com/alecthomas/assert/v2 v2.11.0 h1:2Q9r3ki8+JYXvGsDyBXwH3LcJ+WK5D0gc5E8vS6K3D0=
|
||||
github.com/alecthomas/assert/v2 v2.11.0/go.mod h1:Bze95FyfUr7x34QZrjL+XP+0qgp/zg8yS+TtBj1WA3k=
|
||||
github.com/alecthomas/chroma/v2 v2.2.0/go.mod h1:vf4zrexSH54oEjJ7EdB65tGNHmH3pGZmVkgTP5RHvAs=
|
||||
github.com/alecthomas/chroma/v2 v2.24.1 h1:m5ffpfZbIb++k8AqFEKy9uVgY12xIQtBsQlc6DfZJQM=
|
||||
github.com/alecthomas/chroma/v2 v2.24.1/go.mod h1:l+ohZ9xRXIbGe7cIW+YZgOGbvuVLjMps/FYN/CwuabI=
|
||||
github.com/alecthomas/chroma/v2 v2.25.0 h1:DWkVlxrNpxPf+Qcfe04LBqUArxUiybK8ZQ9T7OFu68E=
|
||||
github.com/alecthomas/chroma/v2 v2.25.0/go.mod h1:+95AZrRWlpW9g6qXD7S7UdHviopsGP/kCIrtJcU3QoQ=
|
||||
github.com/alecthomas/repr v0.0.0-20220113201626-b1b626ac65ae/go.mod h1:2kn6fqh/zIyPLmm3ugklbEi5hg5wS435eygvNfaDQL8=
|
||||
github.com/alecthomas/repr v0.5.2 h1:SU73FTI9D1P5UNtvseffFSGmdNci/O6RsqzeXJtP0Qs=
|
||||
github.com/alecthomas/repr v0.5.2/go.mod h1:Fr0507jx4eOXV7AlPV6AVZLYrLIuIeSOWtW57eE/O/4=
|
||||
@@ -236,13 +236,12 @@ github.com/davidmz/go-pageant v1.0.2 h1:bPblRCh5jGU+Uptpz6LgMZGD5hJoOt7otgT454Wv
|
||||
github.com/davidmz/go-pageant v1.0.2/go.mod h1:P2EDDnMqIwG5Rrp05dTRITj9z2zpGcD9efWSkTNKLIE=
|
||||
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78=
|
||||
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc=
|
||||
github.com/dimiro1/reply v0.0.0-20200315094148-d0136a4c9e21 h1:PdsjTl0Cg+ZJgOx/CFV5NNgO1ThTreqdgKYiDCMHJwA=
|
||||
github.com/dimiro1/reply v0.0.0-20200315094148-d0136a4c9e21/go.mod h1:xJvkyD6Y2rZapGvPJLYo9dyx1s5dxBEDPa8T3YTuOk0=
|
||||
github.com/dlclark/regexp2 v1.2.0/go.mod h1:2pZnwuY/m+8K6iRw6wQdMtk+rH5tNGR1i55kozfMjCc=
|
||||
github.com/dlclark/regexp2 v1.4.0/go.mod h1:2pZnwuY/m+8K6iRw6wQdMtk+rH5tNGR1i55kozfMjCc=
|
||||
github.com/dlclark/regexp2 v1.7.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
|
||||
github.com/dlclark/regexp2 v1.12.0 h1:0j4c5qQmnC6XOWNjP3PIXURXN2gWx76rd3KvgdPkCz8=
|
||||
github.com/dlclark/regexp2 v1.12.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
|
||||
github.com/dlclark/regexp2/v2 v2.1.0 h1:jHXRmHRZGbuQzDZjMlCAXOvQb75iv3HyLDzXGj5H1AY=
|
||||
github.com/dlclark/regexp2/v2 v2.1.0/go.mod h1:Bz5TMy5d8fPK0ximH0Yi9KvsRHNnvXqUx9XG6a4wB+I=
|
||||
github.com/dsnet/compress v0.0.2-0.20230904184137-39efe44ab707 h1:2tV76y6Q9BB+NEBasnqvs7e49aEBFI8ejC89PSnWH+4=
|
||||
github.com/dsnet/compress v0.0.2-0.20230904184137-39efe44ab707/go.mod h1:qssHWj60/X5sZFNxpG4HBPDHVqxNm4DfnCKgrbZOT+s=
|
||||
github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY=
|
||||
|
||||
@@ -23,7 +23,7 @@ import (
|
||||
"gitea.dev/modules/timeutil"
|
||||
"gitea.dev/modules/util"
|
||||
|
||||
"github.com/dlclark/regexp2"
|
||||
"github.com/dlclark/regexp2/v2"
|
||||
"xorm.io/builder"
|
||||
)
|
||||
|
||||
|
||||
@@ -43,10 +43,6 @@
|
||||
"matchPackageNames": ["github.com/urfave/cli/v3"],
|
||||
"allowedVersions": "<3.6.2", // v3.6.2 breaks -c flag parsing in help commands
|
||||
},
|
||||
{
|
||||
"matchPackageNames": ["github.com/dlclark/regexp2"],
|
||||
"allowedVersions": "^1", // v2 fails to build on linux/386: https://github.com/dlclark/regexp2/issues/102
|
||||
},
|
||||
{
|
||||
"matchPackageNames": ["github.com/Azure/azure-sdk-for-go/sdk/azcore"],
|
||||
"allowedVersions": "<1.21.0", // v1.21.0+ uses API version unsupported by Azurite in CI
|
||||
|
||||
@@ -17,7 +17,6 @@ import (
|
||||
"gitea.dev/modules/setting"
|
||||
"gitea.dev/services/mailer/token"
|
||||
|
||||
"github.com/dimiro1/reply"
|
||||
"github.com/emersion/go-imap"
|
||||
"github.com/emersion/go-imap/client"
|
||||
"github.com/jhillyerd/enmime/v2"
|
||||
@@ -356,7 +355,7 @@ func getContentFromMailReader(env *enmime.Envelope) *MailContent {
|
||||
}
|
||||
|
||||
return &MailContent{
|
||||
Content: reply.FromText(env.Text),
|
||||
Content: extractReply(env.Text),
|
||||
Attachments: attachments,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -150,3 +150,56 @@ func TestGetContentFromMailReader(t *testing.T) {
|
||||
assert.Equal(t, "mail content without signature", content.Content)
|
||||
assert.Empty(t, content.Attachments)
|
||||
}
|
||||
|
||||
func TestExtractReply(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
input string
|
||||
expected string
|
||||
}{
|
||||
{"plain text", "Email with only text.", "Email with only text."},
|
||||
{"crlf normalized", "line one\r\nline two\r\n", "line one\nline two"},
|
||||
{"trim blank lines", "\n\n\nactual reply\n\n\n", "actual reply"},
|
||||
{"signature delimiter", "the reply\n--\nJohn Doe\nAcme", "the reply"},
|
||||
{"rfc signature delimiter", "the reply\n-- \nJohn Doe", "the reply"},
|
||||
{"mobile signature", "My answer is yes.\n\nSent from my iPhone", "My answer is yes."},
|
||||
{"quote only kept", "> Email with only quote.", "> Email with only quote."},
|
||||
{"leading quote kept", "> This is a quote.\n\nAnd this is some text.", "> This is a quote.\n\nAnd this is some text."},
|
||||
{"trailing quote stripped", "My reply.\n\n> original line 1\n> original line 2", "My reply."},
|
||||
{"attribution and quote", "Looks good.\n\nOn Mon, Jan 1, 2024 John <j@x.com> wrote:\n> please review", "Looks good."},
|
||||
{"attribution without quote marks", "My reply.\n\nOn Wed, Sep 25, 2013, richard wrote:\noriginal text", "My reply."},
|
||||
{"original message separator", "Foo\n\n-------- Original Message --------\n\nTHE END.", "Foo"},
|
||||
{"outlook header block", "This is the actual reply.\n\nFrom: Some One <a@b.com>\nSent: Monday\nTo: Someone\nSubject: hi\n\nquoted body", "This is the actual reply."},
|
||||
{"french attribution", "C'est super !\n\nLe 4 janv. 2016 19:03, \"Neil\" <a@b.com> a écrit :\n> quoted", "C'est super !"},
|
||||
{"german attribution", "Hey :)\n\nAm 03.02.2016 3:35 schrieb Max <a@b.com>:\n> quoted", "Hey :)"},
|
||||
{"cyrillic wrote verb", "Yes.\n\n6 октября 2014 lidel написал:\n> quoted", "Yes."},
|
||||
{"localized signature", "My answer.\n\nEnvoyé depuis mon iPhone", "My answer."},
|
||||
{"swedish header block", "Hi everyone!\n\nFrån: Foo <a@b.com>\nSkickat: den 5 juni\nTill: x@y.com\nÄmne: hi\n\nbody", "Hi everyone!"},
|
||||
{"attribution only is empty", "On Mon, Jan 1, 2024 at 10:00 John <j@x.com> wrote:\n> please review", ""},
|
||||
{"prose ending in wrote kept", "Hi Bob,\nThanks for the report you wrote\nI'll fix it.", "Hi Bob,\nThanks for the report you wrote\nI'll fix it."},
|
||||
{"on with year and no time kept", "Hi,\nOn the 2024 roadmap we have three items.\nPlease review.", "Hi,\nOn the 2024 roadmap we have three items.\nPlease review."},
|
||||
{"date prose kept", "Notes:\n5 issues 2024 fixed at 9:15 today\nmore notes", "Notes:\n5 issues 2024 fixed at 9:15 today\nmore notes"},
|
||||
{"header needs from first", "Quick note:\nTo: which server?\nFrom: tests pass.\nThanks", "Quick note:\nTo: which server?\nFrom: tests pass.\nThanks"},
|
||||
{"indented header block", "Reply text.\n\n From: A <a@b.com>\n Sent: Monday\n To: x\n Subject: hi\n\nbody", "Reply text."},
|
||||
{"chinese signature", "回复内容\n\n發自我的iPhone", "回复内容"},
|
||||
{"japanese signature", "返信します\n\niPhoneから送信", "返信します"},
|
||||
{"chinese header block", "回复内容\n\n发件人:张三\n收件人:李四\n主题:你好\n\n原文", "回复内容"},
|
||||
{"japanese header block", "本文です\n\n差出人:山田\n宛先:田中\n件名:こんにちは\n\n原文", "本文です"},
|
||||
{"name-first attribution", "Okay.\n\nErlend <meta@x.com> schrieb am Di., 16. Aug. 2016\num 12:52 Uhr:\n> quoted", "Okay."},
|
||||
{"chinese attribution", "你好,谢谢回复。\n\n在 2024年1月1日,张三 <z@x.com> 写道:\n> 原始内容", "你好,谢谢回复。"},
|
||||
{"japanese attribution", "了解しました。\n\n田中さんは書きました:\n> 引用", "了解しました。"},
|
||||
{"korean attribution", "감사합니다.\n\n홍길동님이 작성:\n> 인용", "감사합니다."},
|
||||
{"email mention kept", "I asked Bob <bob@x.com> and he wrote back yes.\nSo we proceed.", "I asked Bob <bob@x.com> and he wrote back yes.\nSo we proceed."},
|
||||
{"trailing mailbox glyph", "My reply here.\n\nᐧ", "My reply here."},
|
||||
{"on with year and time prose kept", "On the 2024 roadmap we should meet at 10:00.\nI'll send invites.", "On the 2024 roadmap we should meet at 10:00.\nI'll send invites."},
|
||||
{"spanish year and time prose kept", "El informe del 2024 estará listo a las 10:00.\nGracias.", "El informe del 2024 estará listo a las 10:00.\nGracias."},
|
||||
{"chinese prose kept", "谢谢,已测试。\n发自我的内心的感谢", "谢谢,已测试。\n发自我的内心的感谢"},
|
||||
{"korean prose kept", "확인했습니다.\n이 문서는 회사에서 보냄", "확인했습니다.\n이 문서는 회사에서 보냄"},
|
||||
{"japanese prose kept", "了解しました。\n資料は会議から送信", "了解しました。\n資料は会議から送信"},
|
||||
}
|
||||
for _, c := range cases {
|
||||
t.Run(c.name, func(t *testing.T) {
|
||||
assert.Equal(t, c.expected, extractReply(c.input))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
137
services/mailer/incoming/reply.go
Normal file
137
services/mailer/incoming/reply.go
Normal file
@@ -0,0 +1,137 @@
|
||||
// Copyright 2026 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package incoming
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"gitea.dev/modules/util"
|
||||
)
|
||||
|
||||
const (
|
||||
yearToken = `\b\d{4}\b` // 4-digit year
|
||||
timeToken = `\b\d{1,2}[:.]\d{2}\b` // HH:MM or HH.MM
|
||||
// "wrote" verbs ending an attribution line; CJK ones are matched without a
|
||||
// preceding word-separator since those scripts don't space their words
|
||||
wroteVerbs = `wrote|writes|schrieb|skrev|napisał|escreveu|escribió|написал|пише|a écrit`
|
||||
cjkWroteVerbs = `写道|寫道|書きました|작성`
|
||||
// device names anchoring CJK mobile signatures, so prose isn't mistaken for one
|
||||
cjkDevice = `iphone|ipad|ipod|android|galaxy|手机|手機|平板`
|
||||
)
|
||||
|
||||
// forwarded-mail header fields across the common mail clients/locales. headerFromFields
|
||||
// (the "From"-equivalents) must begin a block; headerFields is the full set allowed to
|
||||
// follow. Matched as a prefix by headerLine, so adding a locale is a one-line change.
|
||||
var (
|
||||
headerFromFields = []string{
|
||||
"from", "fra", "de", "von", "da", "van", "från", "expéditeur",
|
||||
"发件人", "寄件者", "差出人", "보낸사람",
|
||||
}
|
||||
headerFields = append([]string{
|
||||
"to", "cc", "bcc", "sent", "date", "subject", "reply-to",
|
||||
"til", "emne", "an", "betreff", "gesendet", "para", "assunto", "asunto",
|
||||
"risposta", "inviato", "oggetto", "destinataire", "objet", "répondre à",
|
||||
"aan", "onderwerp", "beantwoorden", "skickat", "till", "ämne",
|
||||
"收件人", "主题", "主旨", "主題", "收件者", "抄送", "日期", "宛先", "件名", "받는사람", "제목",
|
||||
}, headerFromFields...)
|
||||
)
|
||||
|
||||
// patterns are compiled on first use so the incoming-mail feature adds nothing to startup.
|
||||
var patterns = sync.OnceValue(func() (ret struct {
|
||||
signature, attribution, separator *regexp.Regexp
|
||||
},
|
||||
) {
|
||||
// "-- " delimiter and common mobile footers with frequent localizations. The CJK
|
||||
// forms require a device name so ordinary prose like "发自我的内心" or "会議から送信"
|
||||
// is not mistaken for a signature.
|
||||
ret.signature = regexp.MustCompile(`(?i)^(--|__|—` +
|
||||
`|sent (from|via|with) .+|get outlook for .+` +
|
||||
`|envoyé depuis mon .+|sendt fra min .+|von meinem .+|verzonden (met|vanaf) .+` +
|
||||
`|(發|发)自我的.*(` + cjkDevice + `).*` +
|
||||
`|.*(` + cjkDevice + `).*(から送信|에서 보냄|傳送|发送))$`)
|
||||
|
||||
// attribution introducing quoted history: a line ending in a "wrote:" verb
|
||||
// (Latin/Cyrillic or CJK), a "Name <email> wrote" line, a lead word directly
|
||||
// followed by a day number or weekday plus a year and a time, or an ISO-date-led
|
||||
// line. The date phrasing, trailing colon and the email before the verb guard
|
||||
// against prose (so "On the 2024 roadmap … at 10:00" is not an attribution).
|
||||
ret.attribution = regexp.MustCompile(`(?i)^>*\s*(` +
|
||||
`.*[\s">'](` + wroteVerbs + `)\s*[::]` +
|
||||
`|.*(` + cjkWroteVerbs + `)\s*[::]` +
|
||||
`|.*<\S+@\S+>\s+(` + wroteVerbs + `)\b.*` +
|
||||
`|(on|at|le|am|el|em|den|il|op|dnia|w dniu)\b[\s,]*(\d|(?:mon|tue|wed|thu|fri|sat|sun)\b).*` + yearToken + `.*` + timeToken + `.*` +
|
||||
`|\d{4}-\d{2}-\d{2}\b.*` + timeToken + `.*` +
|
||||
`)$`)
|
||||
|
||||
// a dash/underscore rule line, or text fenced by dashes such as
|
||||
// "-------- Original Message --------" or "-----Mensaje original-----"
|
||||
ret.separator = regexp.MustCompile(`(?i)^\s*\*?\s*([-_]{5,}|-{2,}.+-{2,}|original message|forwarded message)\s*\*?\s*$`)
|
||||
return ret
|
||||
})
|
||||
|
||||
// extractReply returns the user-written part of a plain-text email body, dropping
|
||||
// quoted history, the reply attribution, signatures and forwarded headers. It is a
|
||||
// slim, dependency-free reimplementation based on github.com/dimiro1/reply (MIT),
|
||||
// covering the common mail-client formats and languages; bottom posting and
|
||||
// forwarded bodies are not handled.
|
||||
func extractReply(text string) string {
|
||||
p := patterns()
|
||||
lines := strings.Split(util.NormalizeStringEOL(text), "\n")
|
||||
|
||||
// cut at the first line that begins quoted history, a signature or a header block
|
||||
for i := range lines {
|
||||
trimmed := strings.TrimSpace(lines[i])
|
||||
if p.signature.MatchString(trimmed) || p.attribution.MatchString(trimmed) ||
|
||||
p.separator.MatchString(trimmed) || headerBlock(trimmed, lines[i+1:]) {
|
||||
lines = lines[:i]
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// drop the trailing block of quoted/blank lines, unless the whole body is quoted
|
||||
end := len(lines)
|
||||
for end > 0 {
|
||||
// "ᐧ" is the trailing marker some mobile clients (Mailbox) append
|
||||
if t := strings.TrimSpace(lines[end-1]); t != "" && t != "ᐧ" && !strings.HasPrefix(t, ">") {
|
||||
break
|
||||
}
|
||||
end--
|
||||
}
|
||||
if end > 0 {
|
||||
lines = lines[:end]
|
||||
}
|
||||
|
||||
return strings.TrimSpace(strings.Join(lines, "\n"))
|
||||
}
|
||||
|
||||
// headerBlock reports whether a forwarded-mail header block starts here: the
|
||||
// (already-trimmed) first line is a "From" field and the next non-blank line is
|
||||
// another field, so a lone "Subject:" sentence is not a boundary.
|
||||
func headerBlock(first string, rest []string) bool {
|
||||
if !headerLine(first, headerFromFields) {
|
||||
return false
|
||||
}
|
||||
for _, next := range rest {
|
||||
if t := strings.TrimSpace(next); t != "" {
|
||||
return headerLine(t, headerFields)
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// headerLine reports whether the already-trimmed line is a "Field:" header for one
|
||||
// of fields. An ASCII colon must be followed by a space so prose like "To:do this"
|
||||
// is ignored; the CJK fullwidth colon ":" needs no space.
|
||||
func headerLine(line string, fields []string) bool {
|
||||
lower := strings.ToLower(line)
|
||||
for _, field := range fields {
|
||||
if rest, ok := strings.CutPrefix(lower, field); ok &&
|
||||
(strings.HasPrefix(rest, ": ") || strings.HasPrefix(rest, ":")) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
Reference in New Issue
Block a user