nostr_types/types/
content.rs

1use super::{find_nostr_url_pos, NostrBech32, NostrUrl};
2use aho_corasick::AhoCorasick;
3use lazy_static::lazy_static;
4use linkify::{LinkFinder, LinkKind};
5use regex::Regex;
6
7/// This is like `Range<usize>`, except we impl offset() on it
8/// This is like linkify::Span, except we impl offset() on it and don't need
9///   the as_str() or kind() functions.
10#[derive(Clone, Copy, Debug, PartialEq, Eq)]
11pub struct Span {
12    start: usize,
13    end: usize,
14}
15
16impl Span {
17    /// Modify a span by offsetting it from the start by `offset` bytes
18    pub fn offset(&mut self, offset: usize) {
19        self.start += offset;
20        self.end += offset;
21    }
22}
23
24/// A segment of content
25#[derive(Clone, Debug, PartialEq, Eq)]
26pub enum ContentSegment {
27    /// A Nostr URL
28    NostrUrl(NostrUrl),
29
30    /// A reference to an event tag by index
31    TagReference(usize),
32
33    /// A hyperlink
34    Hyperlink(Span),
35
36    /// A hash tag
37    Hashtag(String),
38
39    /// Plain text
40    Plain(Span),
41}
42
43/// A sequence of content segments
44#[derive(Clone, Debug)]
45pub struct ShatteredContent {
46    /// The sequence of `ContentSegment`s
47    pub segments: Vec<ContentSegment>,
48
49    /// The original content (the allocated string)
50    /// `Range`s within segments refer to this
51    pub allocated: String,
52}
53
54impl ShatteredContent {
55    /// Break content into meaningful segments
56    ///
57    /// This avoids reallocation
58    pub fn new(content: String, replace_app_links: bool) -> ShatteredContent {
59        let content = if replace_app_links {
60            replace_urls_with_nostr(&content)
61        } else {
62            content
63        };
64
65        let segments = shatter_content_1(&content);
66
67        ShatteredContent {
68            segments,
69            allocated: content,
70        }
71    }
72
73    /// View a slice of the original content as specified in a Span
74    #[allow(clippy::string_slice)] // the Span is trusted
75    pub fn slice<'a>(&'a self, span: &Span) -> Option<&'a str> {
76        if self.allocated.is_char_boundary(span.start) && self.allocated.is_char_boundary(span.end)
77        {
78            Some(&self.allocated[span.start..span.end])
79        } else {
80            None
81        }
82    }
83}
84
85/// Break content into a linear sequence of `ContentSegment`s
86#[allow(clippy::string_slice)] // start/end from find_nostr_url_pos is trusted
87fn shatter_content_1(mut content: &str) -> Vec<ContentSegment> {
88    let mut segments: Vec<ContentSegment> = Vec::new();
89    let mut offset: usize = 0; // used to adjust Span ranges
90
91    // Pass 1 - `NostrUrl`s
92    while let Some((start, end)) = find_nostr_url_pos(content) {
93        let mut inner_segments = shatter_content_2(&content[..start]);
94        apply_offset(&mut inner_segments, offset);
95        segments.append(&mut inner_segments);
96
97        // The Nostr Bech32 itself
98        if let Some(nbech) = NostrBech32::try_from_string(&content[start + 6..end]) {
99            segments.push(ContentSegment::NostrUrl(NostrUrl(nbech)));
100        } else {
101            segments.push(ContentSegment::Plain(Span { start, end }));
102        }
103
104        offset += end;
105        content = &content[end..];
106    }
107
108    // The stuff after it
109    let mut inner_segments = shatter_content_2(content);
110    apply_offset(&mut inner_segments, offset);
111    segments.append(&mut inner_segments);
112
113    segments
114}
115
116// Pass 2 - `TagReference`s
117#[allow(clippy::string_slice)] // Regex positions are trusted
118fn shatter_content_2(content: &str) -> Vec<ContentSegment> {
119    lazy_static! {
120        static ref TAG_RE: Regex = Regex::new(r"(\#\[\d+\])").unwrap();
121    }
122
123    let mut segments: Vec<ContentSegment> = Vec::new();
124
125    let mut pos = 0;
126    for mat in TAG_RE.find_iter(content) {
127        let mut inner_segments = shatter_content_3(&content[pos..mat.start()]);
128        apply_offset(&mut inner_segments, pos);
129        segments.append(&mut inner_segments);
130
131        // If panics on unwrap, something is wrong with Regex.
132        let u: usize = content[mat.start() + 2..mat.end() - 1].parse().unwrap();
133        segments.push(ContentSegment::TagReference(u));
134        pos = mat.end();
135    }
136
137    let mut inner_segments = shatter_content_3(&content[pos..]);
138    apply_offset(&mut inner_segments, pos);
139    segments.append(&mut inner_segments);
140
141    segments
142}
143
144// Pass 3 - URLs
145#[allow(clippy::string_slice)]
146fn shatter_content_3(content: &str) -> Vec<ContentSegment> {
147    let mut segments: Vec<ContentSegment> = Vec::new();
148
149    for span in LinkFinder::new().kinds(&[LinkKind::Url]).spans(content) {
150        if span.kind().is_some() {
151            segments.push(ContentSegment::Hyperlink(Span {
152                start: span.start(),
153                end: span.end(),
154            }));
155        } else if !span.as_str().is_empty() {
156            let mut inner_segments = shatter_content_4(&content[span.start()..span.end()]);
157            apply_offset(&mut inner_segments, span.start());
158            segments.append(&mut inner_segments);
159        }
160    }
161
162    segments
163}
164
165// Pass 4 - Hashtags
166#[allow(clippy::string_slice)]
167fn shatter_content_4(content: &str) -> Vec<ContentSegment> {
168    lazy_static! {
169        static ref HTAG_RE: Regex =
170            Regex::new(r"(?ms)(?:^|\s)(#[\w\p{Extended_Pictographic}]+)\b").unwrap();
171    }
172
173    let mut segments: Vec<ContentSegment> = Vec::new();
174
175    let mut pos = 0;
176    for cap in HTAG_RE.captures_iter(content) {
177        let mat = cap.get(1).unwrap();
178        if mat.start() > pos {
179            segments.push(ContentSegment::Plain(Span {
180                start: pos,
181                end: mat.start(),
182            }));
183        }
184        segments.push(ContentSegment::Hashtag(
185            content[mat.start() + 1..mat.end()].to_owned(),
186        ));
187        pos = mat.end();
188    }
189
190    if pos < content.len() {
191        segments.push(ContentSegment::Plain(Span {
192            start: pos,
193            end: content.len(),
194        }));
195    }
196
197    segments
198}
199
200fn apply_offset(segments: &mut [ContentSegment], offset: usize) {
201    for segment in segments.iter_mut() {
202        match segment {
203            ContentSegment::Hyperlink(span) => span.offset(offset),
204            ContentSegment::Plain(span) => span.offset(offset),
205            _ => {}
206        }
207    }
208}
209
210fn replace_urls_with_nostr(content: &str) -> String {
211    const PATTERNS: &[(&str, &str)] = &[
212        ("https://njump.me/npub1", "nostr:npub1"),
213        ("https://njump.me/nprofile1", "nostr:nprofile1"),
214        ("https://njump.me/nevent1", "nostr:nevent1"),
215        ("https://njump.me/naddr1", "nostr:naddr1"),
216        ("https://primal.net/e/note1", "nostr:note1"),
217        ("https://primal.net/e/naddr1", "nostr:naddr1"),
218        ("https://primal.net/e/nevent1", "nostr:nevent1"),
219        ("https://primal.net/p/npub1", "nostr:npub1"),
220        ("https://primal.net/p/nprofile1", "nostr:nprofile1"),
221        ("https://nostrudel.ninja/#/u/npub1", "nostr:npub1"),
222        ("https://nostrudel.ninja/u/npub1", "nostr:npub1"),
223        ("https://yakihonne.com/article/naddr1", "nostr:naddr1"),
224        ("https://yakihonne.com/users/npub1", "nostr:npub1"),
225        ("https://damus.io/note1", "nostr:note1"),
226        ("https://damus.io/npub1", "nostr:npub1"),
227        ("https://damus.io/naddr1", "nostr:naddr1"),
228        ("https://damus.io/nevent1", "nostr:nevent1"),
229        ("https://damus.io/nprofile1", "nostr:nprofile1"),
230        ("https://listr.lol/npub1", "nostr:npub1"),
231        ("https://nostr.band/npub1", "nostr:npub1"),
232        ("https://zap.stream/naddr1", "nostr:naddr1"),
233        ("https://tunestr.io/naddr1", "nostr:naddr1"),
234        ("https://zap.cooking/recipe/naddr1", "nostr:naddr1"),
235        ("https://nostrnests.com/naddr1", "nostr:naddr1"),
236        ("https://nostr.com/nprofile1", "nostr:nprofile1"),
237        ("https://coracle.social/nprofile1", "nostr:nprofile1"),
238    ];
239
240    lazy_static! {
241        static ref INPUTS: Vec<&'static str> = PATTERNS.iter().map(|(input, _)| *input).collect();
242        static ref OUTPUTS: Vec<&'static str> =
243            PATTERNS.iter().map(|(_, output)| *output).collect();
244        static ref AHO: AhoCorasick = AhoCorasick::new(INPUTS.iter()).unwrap();
245    }
246
247    AHO.replace_all(content, &OUTPUTS)
248}
249
250#[cfg(test)]
251mod test {
252    use super::*;
253
254    #[test]
255    fn test_shatter_content() {
256        let content_str = "My friend #[0]  wrote me this note: nostr:note10ttnuuvcs29y3k23gwrcurw2ksvgd7c2rrqlfx7urmt5m963vhss8nja90 and it might have referred to https://github.com/Giszmo/nostr.info/blob/master/assets/js/main.js";
257        let content = content_str.to_string();
258        let pieces = ShatteredContent::new(content, false);
259        assert_eq!(pieces.segments.len(), 6);
260        assert!(matches!(pieces.segments[0], ContentSegment::Plain(..)));
261        assert!(matches!(
262            pieces.segments[1],
263            ContentSegment::TagReference(..)
264        ));
265        assert!(matches!(pieces.segments[2], ContentSegment::Plain(..)));
266        assert!(matches!(pieces.segments[3], ContentSegment::NostrUrl(..)));
267        assert!(matches!(pieces.segments[4], ContentSegment::Plain(..)));
268        assert!(matches!(pieces.segments[5], ContentSegment::Hyperlink(..)));
269
270        let content_str = r#"This is a test of NIP-27 posting support referencing this note nostr:nevent1qqsqqqq9wh98g4u6e480vyp6p4w3ux2cd0mxn2rssq0w5cscsgzp2ksprpmhxue69uhkzapwdehhxarjwahhy6mn9e3k7mf0qyt8wumn8ghj7etyv4hzumn0wd68ytnvv9hxgtcpremhxue69uhkummnw3ez6ur4vgh8wetvd3hhyer9wghxuet59uq3kamnwvaz7tmwdaehgu3wd45kketyd9kxwetj9e3k7mf0qy2hwumn8ghj7mn0wd68ytn00p68ytnyv4mz7qgnwaehxw309ahkvenrdpskjm3wwp6kytcpz4mhxue69uhhyetvv9ujuerpd46hxtnfduhsz9mhwden5te0wfjkccte9ehx7um5wghxyctwvshszxthwden5te0wfjkccte9eekummjwsh8xmmrd9skctcnmzajy and again without the url data nostr:note1qqqq2aw2w3te4n2w7cgr5r2arcv4s6lkdx58pqq7af3p3qsyz4dqns2935
271And referencing this person nostr:npub1acg6thl5psv62405rljzkj8spesceyfz2c32udakc2ak0dmvfeyse9p35c and again as an nprofile nostr:nprofile1qqswuyd9ml6qcxd92h6pleptfrcqucvvjy39vg4wx7mv9wm8kakyujgprdmhxue69uhkummnw3ezumtfddjkg6tvvajhytnrdakj7qg7waehxw309ahx7um5wgkhqatz9emk2mrvdaexgetj9ehx2ap0qythwumn8ghj7un9d3shjtnwdaehgu3wd9hxvme0qyt8wumn8ghj7etyv4hzumn0wd68ytnvv9hxgtcpzdmhxue69uhk7enxvd5xz6tw9ec82c30qy2hwumn8ghj7mn0wd68ytn00p68ytnyv4mz7qgcwaehxw309ashgtnwdaehgunhdaexkuewvdhk6tczkvt9n all on the same damn line even (I think)."#;
272        let content = content_str.to_string();
273        let pieces = ShatteredContent::new(content, false);
274        assert_eq!(pieces.segments.len(), 9);
275    }
276
277    #[test]
278    fn test_shatter_content_2() {
279        let content_str =
280            "Ein wunderschönes langes Wochenende auf der #zitadelle2024 geht zu Ende...
281🏰 #einundzwanzig
282Hier einige Impressionen mit opsec gewährten Bildern.
283Wonderful Long Weekend at a Zitadelle, Here Impressions opsec included
284 nostr:npub1vwf2mytkyk22x2gcmr9d7k";
285        let content = content_str.to_string();
286        let pieces = ShatteredContent::new(content, false);
287        assert_eq!(pieces.segments.len(), 6);
288        assert!(matches!(pieces.segments[2], ContentSegment::Plain(..)));
289        assert!(matches!(pieces.segments[4], ContentSegment::Plain(..)));
290        if let ContentSegment::Plain(span) = pieces.segments[5] {
291            let _slice = pieces.slice(&span);
292        }
293    }
294
295    #[test]
296    fn test_shatter_content_3() {
297        let content_str = "Check this out https://primal.net/e/note10ttnuuvcs29y3k23gwrcurw2ksvgd7c2rrqlfx7urmt5m963vhss8nja90";
298        let content = content_str.to_string();
299        let pieces = ShatteredContent::new(content, true);
300        assert_eq!(pieces.segments.len(), 2);
301        assert!(matches!(pieces.segments[1], ContentSegment::NostrUrl(_)));
302    }
303
304    #[test]
305    fn test_shatter_content_4() {
306        let content_str = "#happy this is crazy #mad #dog";
307        let content = content_str.to_string();
308        let pieces = ShatteredContent::new(content, true);
309        for piece in pieces.segments.iter() {
310            println!(">{:?}<", piece);
311        }
312        assert_eq!(pieces.segments.len(), 5);
313        assert_eq!(
314            pieces.segments[0],
315            ContentSegment::Hashtag("happy".to_owned())
316        );
317        assert_eq!(
318            pieces.segments[1],
319            ContentSegment::Plain(Span { start: 6, end: 21 })
320        );
321        assert_eq!(
322            pieces.segments[2],
323            ContentSegment::Hashtag("mad".to_owned())
324        );
325        assert_eq!(
326            pieces.segments[3],
327            ContentSegment::Plain(Span { start: 25, end: 26 })
328        );
329        assert_eq!(
330            pieces.segments[4],
331            ContentSegment::Hashtag("dog".to_owned())
332        );
333    }
334}