page: ignore asides in article body (cq title, teaser)
[minimedit.git] / article.inc.php
1 <?php
2 global $monthname;
3 $monthname = ['?',
4         'januari', 'februari', 'maart', 'april', 'mei', 'juni',
5         'juli', 'augustus', 'september', 'oktober', 'november', 'december',
6 ];
7
8 function showdate($parts)
9 {
10         global $monthname;
11         return implode(' ', array_filter([
12                 intval(@$parts[2]), $parts[1] > 0 ? $monthname[intval($parts[1])] : '', $parts[0],
13                 count($parts) > 5 ? "$parts[3]:$parts[4]" : '',
14         ]));
15 }
16
17 class ArchiveArticle
18 {
19         public $raw, $title, $body; # file contents
20         public $meta = [];
21
22         function __construct($path)
23         {
24                 $this->page = preg_replace('{^\.(?:/|$)}', '', $path);
25                 $this->link = preg_replace('{(?:(?:/|^)index)?\.html$}', '', $this->page);
26                 $this->raw($this->page);
27         }
28
29         function raw($page)
30         {
31                 if (!file_exists($page)) {
32                         return;
33                 }
34                 $this->raw = file_get_contents($page);
35
36                 if (preg_match_all('{
37                         \G <meta \s+ property="( [^"]+ )" \s+ content="( [^"]* )" > \s*
38                 }x', $this->raw, $meta)) {
39                         $matchlen = array_sum(array_map('strlen', $meta[0]));
40                         $this->raw = substr($this->raw, $matchlen); # delete matched contents
41                         $this->meta = array_combine($meta[1], $meta[2]); # [property => content]
42                 }
43
44                 // find significant contents
45                 $this->body = preg_replace('{<aside\b.*?</aside>}s', '', $this->raw);
46                 if (preg_match('{<h2>(.*?)</h2>\s*(.*)}s', $this->body, $titlematch)) {
47                         list (, $this->title, $this->body) = $titlematch;
48                 }
49         }
50
51         function __get($col)
52         {
53                 return $this->$col = $this->$col();  # run method and cache
54         }
55
56         function handler()
57         {
58                 $path = $this->link;
59                 $this->path = '';
60                 $this->restricted = FALSE;
61                 while (TRUE) {
62                         if (file_exists("$path/.private")) {
63                                 $this->restricted = $path;
64                         }
65
66                         if (file_exists("$path/index.php")) {
67                                 return $path;
68                         }
69
70                         $up = strrpos($path, '/');
71                         $this->path = substr($path, $up) . $this->path;
72                         $path = substr($path, 0, $up);
73                         if ($up === FALSE) {
74                                 break;
75                         }
76                 }
77                 return;
78         }
79
80         function safetitle()
81         {
82                 return trim($this->meta['og:title'] ?? strip_tags($this->title));
83         }
84         function name()
85         {
86                 return $this->safetitle ?: $this->link;
87         }
88
89         function last()
90         {
91                 return filemtime($this->page);
92         }
93         function lastiso()
94         {
95                 return date(DATE_ATOM, $this->last);
96         }
97
98         function dateparts()
99         {
100                 preg_match('< / (\d{4}) [/-] (\d{2}) (?:- (\d{2}) )? - >x', $this->page, $ymd);
101                 array_shift($ymd);
102                 return $ymd;
103         }
104         function dateiso()
105         {
106                 return implode('-', $this->dateparts()) . 'T12:00:00+02:00';
107         }
108         function date()
109         {
110                 return showdate($this->dateparts);
111         }
112
113         function story()
114         {
115                 if ( preg_match('{
116                         (?: < (?: p | figure [^>]* ) >\s* )+ (<img\ [^>]*>) | \n <hr\ />
117                 }x', $this->body, $img, PREG_OFFSET_CAPTURE) ) {
118                         # strip part after matching divider (image)
119                         if (isset($img[1])) {
120                                 $this->img = $img[1][0];
121                         }
122                         return substr($this->body, 0, $img[0][1]);
123                 }
124                 return $this->body;
125         }
126
127         function teaser()
128         {
129                 if ($override = @$this->meta['og:description']) {
130                         # prefer specific page description if found in metadata
131                         return $override;
132                 }
133
134                 # paragraph contents following the page header if any
135                 if (preg_match('{
136                         \G (?> \s+ | <div [^>]*> | \[\[[^]]*\]\] )* <p> \s* (.*?) </p>
137                 }sx', $this->body, $bodyp, 0)) {
138                         return $bodyp[1];
139                 }
140         }
141
142         function img()
143         {
144                 $this->img = NULL;
145                 $this->story;
146                 return $this->img;
147         }
148         function image()
149         {
150                 if ($override = @$this->meta['og:image']) {
151                         # prefer specific page image if found in metadata
152                         return $override;
153                 }
154
155                 if ( preg_match('/\bsrc="([^"]*)"/', $this->img, $src) ) {
156                         return $src[1];
157                 }
158         }
159         function thumb($size = '300x')
160         {
161                 if (!$this->image or $this->image[0] !== '/') return;
162                 return preg_replace(
163                         ['{^(?:/thumb/[^/]*)?}', '/\.groot(?=\.\w+$)/'], ["thumb/$size", ''],
164                         $this->image
165                 );
166         }
167 }
168
169 class PageSearch
170 {
171         function __construct($path = '.')
172         {
173                 $this->iterator = new RecursiveCallbackFilterIterator(
174                         new RecursiveDirectoryIterator($path),
175                         function ($current) {
176                                 if ($current->getFilename()[0] === '.') {
177                                         # skip hidden files and directories
178                                         return FALSE;
179                                 }
180                                 if ($current->isLink()) {
181                                         # ignore symlinks, original contents only
182                                         return FALSE;
183                                 }
184                                 if ($current->isDir()) {
185                                         # traverse subdirectories unless untracked in any amount
186                                         return !file_exists("$current/.gitignore");
187                                 }
188                                 # match **/*.html
189                                 return preg_match('/(?<!\.inc)\.html$/', $current->getFilename());
190                         }
191                 );
192         }
193
194         function files()
195         {
196                 # order alphabetically by link
197                 $dir = iterator_to_array(new RecursiveIteratorIterator($this->iterator));
198                 array_walk($dir, function (&$row, $name) {
199                         # prepare values for sorting (directory index first)
200                         $row = preg_replace('{/index\.html$}', '', $name);
201                 });
202                 asort($dir);
203                 return $dir;
204         }
205 }