thumb: indicate 1 month caching of generated output
[minimedit.git] / article.inc.php
index c6f3be9680d34b5d331cbf1c52c0c2f342add3d1..3c65a97d871b7fb8245023785a40f74646dc73ad 100644 (file)
@@ -16,10 +16,27 @@ function showdate($parts)
 
 class ArchiveArticle
 {
+       public $raw, $preface, $title, $body;
+       public $meta = [];
+
        function __construct($path)
        {
                $this->page = $path;
                $this->link = preg_replace('{(?:/index)?\.html$}', '', $path);
+               if (file_exists($this->page)) {
+                       $this->raw = file_get_contents($this->page);
+
+                       if (preg_match_all('{
+                               \G <meta \s+ property="( [^"]+ )" \s+ content="( [^"]* )" > \s*
+                       }x', $this->raw, $meta)) {
+                               $matchlen = array_sum(array_map('strlen', $meta[0]));
+                               $this->raw = substr($this->raw, $matchlen); # delete matched contents
+                               $this->meta = array_combine($meta[1], $meta[2]); # [property => content]
+                       }
+
+                       @list ($this->preface, $this->title, $this->body) =
+                               preg_split('{<h2>(.*?)</h2>\s*}', $this->raw, 2, PREG_SPLIT_DELIM_CAPTURE);
+               }
        }
 
        function __get($col)
@@ -27,22 +44,10 @@ class ArchiveArticle
                return $this->$col = $this->$col();  # run method and cache
        }
 
-       function file()
-       {
-               if (!file_exists($this->page)) return;
-               return fopen($this->page, 'r');
-       }
-
-       function title()
-       {
-               return preg_replace('{<h2>(.*)</h2>\s*}', '\1', fgets($this->file));
-       }
-
        function safetitle()
        {
-               return trim(strip_tags($this->title));
+               return trim($this->meta['og:title'] ?? strip_tags($this->title));
        }
-
        function name()
        {
                return $this->safetitle ?: $this->link;
@@ -52,7 +57,6 @@ class ArchiveArticle
        {
                return filemtime($this->page);
        }
-
        function lastiso()
        {
                return date(DATE_ATOM, $this->last);
@@ -64,35 +68,49 @@ class ArchiveArticle
                array_shift($ymd);
                return $ymd;
        }
-
        function dateiso()
        {
                return implode('-', $this->dateparts()) . 'T12:00:00+02:00';
        }
-
        function date()
        {
                return showdate($this->dateparts);
        }
 
-       function body()
+       function story()
        {
-               $this->title;
-               $rest = fread($this->file, filesize($this->page));
                if ( preg_match('{
                        \n (?: < (?: p | figure [^>]* ) >\s* )+ (<img\ [^>]*>) | \n <hr\ />
-               }x', $rest, $img, PREG_OFFSET_CAPTURE) ) {
+               }x', $this->body, $img, PREG_OFFSET_CAPTURE) ) {
+                       # strip part after matching divider (image)
                        if (isset($img[1])) {
                                $this->img = $img[1][0];
                        }
-                       return substr($rest, 0, $img[0][1]);
+                       return substr($this->body, 0, $img[0][1]);
                }
-               return $rest;
+               return $this->body;
        }
 
        function teaser()
        {
-               if (preg_match('{<p>(.*?)</p>}s', $this->body, $bodyp)) {
+               if ($override = @$this->meta['og:description']) {
+                       # prefer specific page description if found in metadata
+                       return $override;
+               }
+
+               if (preg_match('{
+                       </h2> (?: \s+ | <p\sclass="nav\b.*?</p> | <div[^>]*> )* <p> \s* (.*?) </p>
+               }sx', $this->raw, $bodyp, PREG_OFFSET_CAPTURE)) {
+                       # fallback paragraph contents following the page header
+                       if ($bodyp[1][1] < 256) {
+                               return $bodyp[1][0];
+                       }
+               }
+
+               # starting paragraph for documents without title (assumed simple/partial)
+               if (strpos($this->raw, '<h2') === FALSE and preg_match('{
+                       \A <p> \s* (.*?) </p>
+               }sx', $this->raw, $bodyp)) {
                        return $bodyp[1];
                }
        }
@@ -100,17 +118,20 @@ class ArchiveArticle
        function img()
        {
                $this->img = NULL;
-               $this->body;
+               $this->story;
                return $this->img;
        }
-
        function image()
        {
+               if ($override = @$this->meta['og:image']) {
+                       # prefer specific page image if found in metadata
+                       return $override;
+               }
+
                if ( preg_match('/\bsrc="([^"]*)"/', $this->img, $src) ) {
                        return $src[1];
                }
        }
-
        function thumb($size = '300x')
        {
                if (!$this->image or $this->image[0] !== '/') return;