page: extend teaser range of fallback paragraph
[minimedit.git] / article.inc.php
index d42e49823c96b2536613c806f5399194b7b6c414..53102bac4065b11ec7630897329e131561ce551d 100644 (file)
@@ -23,20 +23,26 @@ class ArchiveArticle
        {
                $this->page = preg_replace('{^\.(?:/|$)}', '', $path);
                $this->link = preg_replace('{(?:/index)?\.html$}', '', $this->page);
-               if (file_exists($this->page)) {
-                       $this->raw = file_get_contents($this->page);
-
-                       if (preg_match_all('{
-                               \G <meta \s+ property="( [^"]+ )" \s+ content="( [^"]* )" > \s*
-                       }x', $this->raw, $meta)) {
-                               $matchlen = array_sum(array_map('strlen', $meta[0]));
-                               $this->raw = substr($this->raw, $matchlen); # delete matched contents
-                               $this->meta = array_combine($meta[1], $meta[2]); # [property => content]
-                       }
+               $this->raw($this->page);
+       }
 
-                       @list ($this->preface, $this->title, $this->body) =
-                               preg_split('{<h2>(.*?)</h2>\s*}', $this->raw, 2, PREG_SPLIT_DELIM_CAPTURE);
+       function raw($page)
+       {
+               if (!file_exists($page)) {
+                       return;
+               }
+               $this->raw = file_get_contents($page);
+
+               if (preg_match_all('{
+                       \G <meta \s+ property="( [^"]+ )" \s+ content="( [^"]* )" > \s*
+               }x', $this->raw, $meta)) {
+                       $matchlen = array_sum(array_map('strlen', $meta[0]));
+                       $this->raw = substr($this->raw, $matchlen); # delete matched contents
+                       $this->meta = array_combine($meta[1], $meta[2]); # [property => content]
                }
+
+               @list ($this->preface, $this->title, $this->body) =
+                       preg_split('{<h2>(.*?)</h2>\s*}s', $this->raw, 2, PREG_SPLIT_DELIM_CAPTURE);
        }
 
        function __get($col)
@@ -44,6 +50,30 @@ class ArchiveArticle
                return $this->$col = $this->$col();  # run method and cache
        }
 
+       function handler()
+       {
+               $path = $this->link;
+               $this->path = '';
+               $this->restricted = FALSE;
+               while (TRUE) {
+                       if (file_exists("$path/.private")) {
+                               $this->restricted = $path;
+                       }
+
+                       if (file_exists("$path/index.php")) {
+                               return $path;
+                       }
+
+                       $up = strrpos($path, '/');
+                       $this->path = substr($path, $up) . $this->path;
+                       $path = substr($path, 0, $up);
+                       if ($up === FALSE) {
+                               break;
+                       }
+               }
+               return;
+       }
+
        function safetitle()
        {
                return trim($this->meta['og:title'] ?? strip_tags($this->title));
@@ -80,7 +110,7 @@ class ArchiveArticle
        function story()
        {
                if ( preg_match('{
-                       \n (?: < (?: p | figure [^>]* ) >\s* )+ (<img\ [^>]*>) | \n <hr\ />
+                       (?: < (?: p | figure [^>]* ) >\s* )+ (<img\ [^>]*>) | \n <hr\ />
                }x', $this->body, $img, PREG_OFFSET_CAPTURE) ) {
                        # strip part after matching divider (image)
                        if (isset($img[1])) {
@@ -102,14 +132,14 @@ class ArchiveArticle
                        </h2> (?: \s+ | <p\sclass="nav\b.*?</p> | <div[^>]*> )* <p> \s* (.*?) </p>
                }sx', $this->raw, $bodyp, PREG_OFFSET_CAPTURE)) {
                        # fallback paragraph contents following the page header
-                       if ($bodyp[1][1] < 256) {
+                       if ($bodyp[1][1] < 512) {
                                return $bodyp[1][0];
                        }
                }
 
                # starting paragraph for documents without title (assumed simple/partial)
                if (strpos($this->raw, '<h2') === FALSE and preg_match('{
-                       \A <p> \s* (.*?) </p>
+                       \A (?: <div [^>]*> \s* )* <p> \s* (.*?) </p>
                }sx', $this->raw, $bodyp)) {
                        return $bodyp[1];
                }