page: ignore asides in article body (cq title, teaser)
authorMischa POSLAWSKY <perl@shiar.org>
Sun, 29 Nov 2020 03:07:33 +0000 (04:07 +0100)
committerMischa POSLAWSKY <perl@shiar.org>
Sat, 5 Dec 2020 00:41:21 +0000 (01:41 +0100)
Prepare significant page contents in body attribute, regardless of raw.
Assume it will be used for summary text, without stripped parts in preface.

Fixes preceding aside becoming page metadata on lijtweg.nl/doc/regels.

article.inc.php

index 98cd475b22194095fca6924c9ef4570b6eaed041..6c3ca935be15bf0318a377564ba8d136b4f8239b 100644 (file)
@@ -16,7 +16,7 @@ function showdate($parts)
 
 class ArchiveArticle
 {
-       public $raw, $preface, $title, $body;
+       public $raw, $title, $body; # file contents
        public $meta = [];
 
        function __construct($path)
@@ -41,8 +41,11 @@ class ArchiveArticle
                        $this->meta = array_combine($meta[1], $meta[2]); # [property => content]
                }
 
-               @list ($this->preface, $this->title, $this->body) =
-                       preg_split('{<h2>(.*?)</h2>\s*}s', $this->raw, 2, PREG_SPLIT_DELIM_CAPTURE);
+               // find significant contents
+               $this->body = preg_replace('{<aside\b.*?</aside>}s', '', $this->raw);
+               if (preg_match('{<h2>(.*?)</h2>\s*(.*)}s', $this->body, $titlematch)) {
+                       list (, $this->title, $this->body) = $titlematch;
+               }
        }
 
        function __get($col)
@@ -129,11 +132,9 @@ class ArchiveArticle
                }
 
                # paragraph contents following the page header if any
-               $offset = strpos($this->raw, '</h2>');
-               $offset = $offset ? $offset + 5 : 0;
                if (preg_match('{
-                       \G (?> \s+ | <aside\b.*?</aside> | <div [^>]*> | \[\[[^]]*\]\] )* <p> \s* (.*?) </p>
-               }sx', $this->raw, $bodyp, 0, $offset)) {
+                       \G (?> \s+ | <div [^>]*> | \[\[[^]]*\]\] )* <p> \s* (.*?) </p>
+               }sx', $this->body, $bodyp, 0)) {
                        return $bodyp[1];
                }
        }