X-Git-Url: http://git.shiar.nl/minimedit.git/blobdiff_plain/f0a0679f7ec71d6ae4969d39da4d6be84147fcb4..053009b156c6f814fb405c8575bb294aab512627:/article.inc.php diff --git a/article.inc.php b/article.inc.php index e3a3ee8..6fcb640 100644 --- a/article.inc.php +++ b/article.inc.php @@ -17,13 +17,23 @@ function showdate($parts) class ArchiveArticle { public $raw, $preface, $title, $body; + public $meta = []; function __construct($path) { - $this->page = $path; - $this->link = preg_replace('{(?:/index)?\.html$}', '', $path); + $this->page = preg_replace('{^\.(?:/|$)}', '', $path); + $this->link = preg_replace('{(?:/index)?\.html$}', '', $this->page); if (file_exists($this->page)) { $this->raw = file_get_contents($this->page); + + if (preg_match_all('{ + \G \s* + }x', $this->raw, $meta)) { + $matchlen = array_sum(array_map('strlen', $meta[0])); + $this->raw = substr($this->raw, $matchlen); # delete matched contents + $this->meta = array_combine($meta[1], $meta[2]); # [property => content] + } + @list ($this->preface, $this->title, $this->body) = preg_split('{

(.*?)

\s*}', $this->raw, 2, PREG_SPLIT_DELIM_CAPTURE); } @@ -36,7 +46,7 @@ class ArchiveArticle function safetitle() { - return trim(strip_tags($this->title)); + return trim($this->meta['og:title'] ?? strip_tags($this->title)); } function name() { @@ -80,23 +90,29 @@ class ArchiveArticle } return $this->body; } + function teaser() { - if (preg_match('{ - ]* content="([^">]*)" - }x', $this->preface, $meta)) { - # prefer specific page description if found (assume before title) - #TODO: strip from body contents - return $meta[1]; + if ($override = @$this->meta['og:description']) { + # prefer specific page description if found in metadata + return $override; } + if (preg_match('{ - (?: \s+ | | ]*> )*

(.*?)

+ (?: \s+ | | ]*> )*

\s* (.*?)

}sx', $this->raw, $bodyp, PREG_OFFSET_CAPTURE)) { # fallback paragraph contents following the page header if ($bodyp[1][1] < 256) { return $bodyp[1][0]; } } + + # starting paragraph for documents without title (assumed simple/partial) + if (strpos($this->raw, ' \s* (.*?)

+ }sx', $this->raw, $bodyp)) { + return $bodyp[1]; + } } function img() @@ -107,6 +123,11 @@ class ArchiveArticle } function image() { + if ($override = @$this->meta['og:image']) { + # prefer specific page image if found in metadata + return $override; + } + if ( preg_match('/\bsrc="([^"]*)"/', $this->img, $src) ) { return $src[1]; } @@ -120,3 +141,38 @@ class ArchiveArticle ); } } + +class PageSearch +{ + function __construct($path = '.') + { + $this->iterator = new RecursiveCallbackFilterIterator( + new RecursiveDirectoryIterator($path), + function ($current) { + if ($current->getFilename()[0] === '.') { + # skip hidden files and directories + return FALSE; + } + if ($current->isLink()) { + # ignore symlinks, original contents only + return FALSE; + } + # match **/*.html + return $current->isDir() + || preg_match('/(?getFilename()); + } + ); + } + + function files() + { + # order alphabetically by link + $dir = iterator_to_array(new RecursiveIteratorIterator($this->iterator)); + array_walk($dir, function (&$row, $name) { + # prepare values for sorting (directory index first) + $row = preg_replace('{/index\.html$}', '', $name); + }); + asort($dir); + return $dir; + } +}