X-Git-Url: http://git.shiar.nl/minimedit.git/blobdiff_plain/f0a0679f7ec71d6ae4969d39da4d6be84147fcb4..bd940f48a9b1c6fb45aef4860d3f23b8094d5ba4:/article.inc.php

diff --git a/article.inc.php b/article.inc.php
index e3a3ee8..93bbe3b 100644
--- a/article.inc.php
+++ b/article.inc.php
@@ -16,16 +16,35 @@ function showdate($parts)
 
 class ArchiveArticle
 {
-	public $raw, $preface, $title, $body;
+	public $raw, $title, $body; # file contents
+	public $meta = [];  # head metadata properties
 
 	function __construct($path)
 	{
-		$this->page = $path;
-		$this->link = preg_replace('{(?:/index)?\.html$}', '', $path);
-		if (file_exists($this->page)) {
-			$this->raw = file_get_contents($this->page);
-			@list ($this->preface, $this->title, $this->body) =
-				preg_split('{<h2>(.*?)</h2>\s*}', $this->raw, 2, PREG_SPLIT_DELIM_CAPTURE);
+		$this->page = preg_replace('{^\.(?:/|$)}', '', $path);
+		$this->link = preg_replace('{(?:(?:/|^)index)?\.html$}', '', $this->page);
+		$this->raw($this->page);
+	}
+
+	function raw($page)
+	{
+		if (!file_exists($page)) {
+			return;
+		}
+		$this->raw = file_get_contents($page);
+
+		if (preg_match_all('{
+			\G <meta \s+ property="( [^"]+ )" \s+ content="( [^"]* )" > \s*
+		}x', $this->raw, $meta)) {
+			$matchlen = array_sum(array_map('strlen', $meta[0]));
+			$this->raw = substr($this->raw, $matchlen); # delete matched contents
+			$this->meta = array_combine($meta[1], $meta[2]); # [property => content]
+		}
+
+		// find significant contents
+		$this->body = preg_replace('{<aside\b.*?</aside>}s', '', $this->raw);
+		if (preg_match('{<h2>(.*?)</h2>\s*(.*)}s', $this->body, $titlematch)) {
+			list (, $this->title, $this->body) = $titlematch;
 		}
 	}
 
@@ -34,9 +53,33 @@ class ArchiveArticle
 		return $this->$col = $this->$col();  # run method and cache
 	}
 
+	function handler()
+	{
+		$path = $this->link;
+		$this->path = '';
+		$this->restricted = FALSE;
+		while (TRUE) {
+			if (file_exists("$path/.private")) {
+				$this->restricted = $path;
+			}
+
+			if (file_exists("$path/index.php")) {
+				return $path;
+			}
+
+			$up = strrpos($path, '/');
+			$this->path = substr($path, $up) . $this->path;
+			$path = substr($path, 0, $up);
+			if ($up === FALSE) {
+				break;
+			}
+		}
+		return;
+	}
+
 	function safetitle()
 	{
-		return trim(strip_tags($this->title));
+		return trim($this->meta['og:title'] ?? strip_tags($this->title));
 	}
 	function name()
 	{
@@ -70,7 +113,7 @@ class ArchiveArticle
 	function story()
 	{
 		if ( preg_match('{
-			\n (?: < (?: p | figure [^>]* ) >\s* )+ (<img\ [^>]*>) | \n <hr\ />
+			(?: < (?: p | figure [^>]* ) >\s* )+ (<img\ [^>]*>) | \n <hr\ />
 		}x', $this->body, $img, PREG_OFFSET_CAPTURE) ) {
 			# strip part after matching divider (image)
 			if (isset($img[1])) {
@@ -80,22 +123,19 @@ class ArchiveArticle
 		}
 		return $this->body;
 	}
+
 	function teaser()
 	{
-		if (preg_match('{
-			<meta \s+ name="description" [^>]* content="([^">]*)"
-		}x', $this->preface, $meta)) {
-			# prefer specific page description if found (assume before title)
-			#TODO: strip from body contents
-			return $meta[1];
+		if ($override = @$this->meta['og:description']) {
+			# prefer specific page description if found in metadata
+			return $override;
 		}
+
+		# paragraph contents following the page header if any
 		if (preg_match('{
-			</h2> (?: \s+ | <p\sclass="nav\b.*?</p> | <div[^>]*> )* <p> (.*?) </p>
-		}sx', $this->raw, $bodyp, PREG_OFFSET_CAPTURE)) {
-			# fallback paragraph contents following the page header
-			if ($bodyp[1][1] < 256) {
-				return $bodyp[1][0];
-			}
+			\G (?> \s+ | <div [^>]*> | \[\[[^]]*\]\] )* <p> \s* (.*?) </p>
+		}sx', $this->body, $bodyp, 0)) {
+			return $bodyp[1];
 		}
 	}
 
@@ -107,6 +147,11 @@ class ArchiveArticle
 	}
 	function image()
 	{
+		if ($override = @$this->meta['og:image']) {
+			# prefer specific page image if found in metadata
+			return $override;
+		}
+
 		if ( preg_match('/\bsrc="([^"]*)"/', $this->img, $src) ) {
 			return $src[1];
 		}
@@ -120,3 +165,41 @@ class ArchiveArticle
 		);
 	}
 }
+
+class PageSearch
+{
+	function __construct($path = '.')
+	{
+		$this->iterator = new RecursiveCallbackFilterIterator(
+			new RecursiveDirectoryIterator($path),
+			function ($current) {
+				if ($current->getFilename()[0] === '.') {
+					# skip hidden files and directories
+					return FALSE;
+				}
+				if ($current->isLink()) {
+					# ignore symlinks, original contents only
+					return FALSE;
+				}
+				if ($current->isDir()) {
+					# traverse subdirectories unless untracked in any amount
+					return !file_exists("$current/.gitignore");
+				}
+				# match **/*.html
+				return preg_match('/(?<!\.inc)\.html$/', $current->getFilename());
+			}
+		);
+	}
+
+	function files()
+	{
+		# order alphabetically by link
+		$dir = iterator_to_array(new RecursiveIteratorIterator($this->iterator));
+		array_walk($dir, function (&$row, $name) {
+			# prepare values for sorting (directory index first)
+			$row = preg_replace('{/index\.html$}', '', $name);
+		});
+		asort($dir);
+		return $dir;
+	}
+}