From f2df190a6c4a0128eb351bc398cffd0edf8d6096 Mon Sep 17 00:00:00 2001 From: Mischa POSLAWSKY Date: Sun, 17 May 2020 05:17:38 +0200 Subject: [PATCH] article: find teaser paragraph from variable offset Simplify matching by starting at optional first . No longer ignores (short) preceding titles such as on Lijtweg home; more reliable otherwise. --- article.inc.php | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/article.inc.php b/article.inc.php index dd3d5b2..dbc545f 100644 --- a/article.inc.php +++ b/article.inc.php @@ -128,19 +128,12 @@ class ArchiveArticle return $override; } + # paragraph contents following the page header if any + $offset = strpos($this->raw, ''); + $offset = $offset ? $offset + 5 : 0; if (preg_match('{ - (?: \s+ | | ]*> )*

\s* (.*?)

- }sx', $this->raw, $bodyp, PREG_OFFSET_CAPTURE)) { - # fallback paragraph contents following the page header - if ($bodyp[1][1] < 512) { - return $bodyp[1][0]; - } - } - - # starting paragraph for documents without title (assumed simple/partial) - if (strpos($this->raw, ']*> \s* )*

\s* (.*?)

- }sx', $this->raw, $bodyp)) { + \G (?: \s+ | |
]*> )*

\s* (.*?)

+ }sx', $this->raw, $bodyp, 0, $offset)) { return $bodyp[1]; } } -- 2.30.0