page: article method to find handler code
[minimedit.git] / article.inc.php
index a6a4ef800e9504f7440a5d34b42d774606c2fe96..d8ddf77b990b374d25586da8a01501ad030d21b3 100644 (file)
@@ -17,16 +17,32 @@ function showdate($parts)
 class ArchiveArticle
 {
        public $raw, $preface, $title, $body;
+       public $meta = [];
 
        function __construct($path)
        {
-               $this->page = $path;
-               $this->link = preg_replace('{(?:/index)?\.html$}', '', $path);
-               if (file_exists($this->page)) {
-                       $this->raw = file_get_contents($this->page);
-                       @list ($this->preface, $this->title, $this->body) =
-                               preg_split('{<h2>(.*?)</h2>\s*}', $this->raw, 2, PREG_SPLIT_DELIM_CAPTURE);
+               $this->page = preg_replace('{^\.(?:/|$)}', '', $path);
+               $this->link = preg_replace('{(?:/index)?\.html$}', '', $this->page);
+               $this->raw($this->page);
+       }
+
+       function raw($page)
+       {
+               if (!file_exists($page)) {
+                       return;
+               }
+               $this->raw = file_get_contents($page);
+
+               if (preg_match_all('{
+                       \G <meta \s+ property="( [^"]+ )" \s+ content="( [^"]* )" > \s*
+               }x', $this->raw, $meta)) {
+                       $matchlen = array_sum(array_map('strlen', $meta[0]));
+                       $this->raw = substr($this->raw, $matchlen); # delete matched contents
+                       $this->meta = array_combine($meta[1], $meta[2]); # [property => content]
                }
+
+               @list ($this->preface, $this->title, $this->body) =
+                       preg_split('{<h2>(.*?)</h2>\s*}s', $this->raw, 2, PREG_SPLIT_DELIM_CAPTURE);
        }
 
        function __get($col)
@@ -34,9 +50,33 @@ class ArchiveArticle
                return $this->$col = $this->$col();  # run method and cache
        }
 
+       function handler()
+       {
+               $path = $this->link;
+               $this->path = '';
+               $this->restricted = FALSE;
+               while (TRUE) {
+                       if (file_exists("$path/.private")) {
+                               $this->restricted = $path;
+                       }
+
+                       if (file_exists("$path/index.php")) {
+                               return $path;
+                       }
+
+                       $up = strrpos($path, '/');
+                       $this->path = substr($path, $up) . $this->path;
+                       $path = substr($path, 0, $up);
+                       if ($up === FALSE) {
+                               break;
+                       }
+               }
+               return;
+       }
+
        function safetitle()
        {
-               return trim(strip_tags($this->title));
+               return trim($this->meta['og:title'] ?? strip_tags($this->title));
        }
        function name()
        {
@@ -80,14 +120,12 @@ class ArchiveArticle
                }
                return $this->body;
        }
+
        function teaser()
        {
-               if (preg_match('{
-                       <meta \s+ name="description" [^>]* content="([^">]*)"
-               }x', $this->preface, $meta)) {
-                       # prefer specific page description if found (assume before title)
-                       #TODO: strip from body contents
-                       return $meta[1];
+               if ($override = @$this->meta['og:description']) {
+                       # prefer specific page description if found in metadata
+                       return $override;
                }
 
                if (preg_match('{
@@ -101,7 +139,7 @@ class ArchiveArticle
 
                # starting paragraph for documents without title (assumed simple/partial)
                if (strpos($this->raw, '<h2') === FALSE and preg_match('{
-                       \A <p> \s* (.*?) </p>
+                       \A (?: <div [^>]*> \s* )* <p> \s* (.*?) </p>
                }sx', $this->raw, $bodyp)) {
                        return $bodyp[1];
                }
@@ -115,6 +153,11 @@ class ArchiveArticle
        }
        function image()
        {
+               if ($override = @$this->meta['og:image']) {
+                       # prefer specific page image if found in metadata
+                       return $override;
+               }
+
                if ( preg_match('/\bsrc="([^"]*)"/', $this->img, $src) ) {
                        return $src[1];
                }
@@ -128,3 +171,41 @@ class ArchiveArticle
                );
        }
 }
+
+class PageSearch
+{
+       function __construct($path = '.')
+       {
+               $this->iterator = new RecursiveCallbackFilterIterator(
+                       new RecursiveDirectoryIterator($path),
+                       function ($current) {
+                               if ($current->getFilename()[0] === '.') {
+                                       # skip hidden files and directories
+                                       return FALSE;
+                               }
+                               if ($current->isLink()) {
+                                       # ignore symlinks, original contents only
+                                       return FALSE;
+                               }
+                               if ($current->isDir()) {
+                                       # traverse subdirectories unless untracked in any amount
+                                       return !file_exists("$current/.gitignore");
+                               }
+                               # match **/*.html
+                               return preg_match('/(?<!\.inc)\.html$/', $current->getFilename());
+                       }
+               );
+       }
+
+       function files()
+       {
+               # order alphabetically by link
+               $dir = iterator_to_array(new RecursiveIteratorIterator($this->iterator));
+               array_walk($dir, function (&$row, $name) {
+                       # prepare values for sorting (directory index first)
+                       $row = preg_replace('{/index\.html$}', '', $name);
+               });
+               asort($dir);
+               return $dir;
+       }
+}