issue: match image replies for metadata
[minimedit.git] / article.inc.php
1 <?php
2 global $monthname;
3 $monthname = ['?',
4         'januari', 'februari', 'maart', 'april', 'mei', 'juni',
5         'juli', 'augustus', 'september', 'oktober', 'november', 'december',
6 ];
7
8 function showdate($parts)
9 {
10         global $monthname;
11         return implode(' ', array_filter([
12                 intval(@$parts[2]), $parts[1] > 0 ? $monthname[intval($parts[1])] : '', $parts[0],
13                 count($parts) > 5 ? "$parts[3]:$parts[4]" : '',
14         ]));
15 }
16
17 class ArchiveArticle
18 {
19         public $raw, $preface, $title, $body;
20         public $meta = [];
21
22         function __construct($path)
23         {
24                 $this->page = preg_replace('{^\.(?:/|$)}', '', $path);
25                 $this->link = preg_replace('{(?:/index)?\.html$}', '', $this->page);
26                 $this->raw($this->page);
27         }
28
29         function raw($page)
30         {
31                 if (!file_exists($page)) {
32                         return;
33                 }
34                 $this->raw = file_get_contents($page);
35
36                 if (preg_match_all('{
37                         \G <meta \s+ property="( [^"]+ )" \s+ content="( [^"]* )" > \s*
38                 }x', $this->raw, $meta)) {
39                         $matchlen = array_sum(array_map('strlen', $meta[0]));
40                         $this->raw = substr($this->raw, $matchlen); # delete matched contents
41                         $this->meta = array_combine($meta[1], $meta[2]); # [property => content]
42                 }
43
44                 @list ($this->preface, $this->title, $this->body) =
45                         preg_split('{<h2>(.*?)</h2>\s*}s', $this->raw, 2, PREG_SPLIT_DELIM_CAPTURE);
46         }
47
48         function __get($col)
49         {
50                 return $this->$col = $this->$col();  # run method and cache
51         }
52
53         function handler()
54         {
55                 $path = $this->link;
56                 $this->path = '';
57                 $this->restricted = FALSE;
58                 while (TRUE) {
59                         if (file_exists("$path/.private")) {
60                                 $this->restricted = $path;
61                         }
62
63                         if (file_exists("$path/index.php")) {
64                                 return $path;
65                         }
66
67                         $up = strrpos($path, '/');
68                         $this->path = substr($path, $up) . $this->path;
69                         $path = substr($path, 0, $up);
70                         if ($up === FALSE) {
71                                 break;
72                         }
73                 }
74                 return;
75         }
76
77         function safetitle()
78         {
79                 return trim($this->meta['og:title'] ?? strip_tags($this->title));
80         }
81         function name()
82         {
83                 return $this->safetitle ?: $this->link;
84         }
85
86         function last()
87         {
88                 return filemtime($this->page);
89         }
90         function lastiso()
91         {
92                 return date(DATE_ATOM, $this->last);
93         }
94
95         function dateparts()
96         {
97                 preg_match('< / (\d{4}) [/-] (\d{2}) (?:- (\d{2}) )? - >x', $this->page, $ymd);
98                 array_shift($ymd);
99                 return $ymd;
100         }
101         function dateiso()
102         {
103                 return implode('-', $this->dateparts()) . 'T12:00:00+02:00';
104         }
105         function date()
106         {
107                 return showdate($this->dateparts);
108         }
109
110         function story()
111         {
112                 if ( preg_match('{
113                         (?: < (?: p | figure [^>]* ) >\s* )+ (<img\ [^>]*>) | \n <hr\ />
114                 }x', $this->body, $img, PREG_OFFSET_CAPTURE) ) {
115                         # strip part after matching divider (image)
116                         if (isset($img[1])) {
117                                 $this->img = $img[1][0];
118                         }
119                         return substr($this->body, 0, $img[0][1]);
120                 }
121                 return $this->body;
122         }
123
124         function teaser()
125         {
126                 if ($override = @$this->meta['og:description']) {
127                         # prefer specific page description if found in metadata
128                         return $override;
129                 }
130
131                 if (preg_match('{
132                         </h2> (?: \s+ | <p\sclass="nav\b.*?</p> | <div[^>]*> )* <p> \s* (.*?) </p>
133                 }sx', $this->raw, $bodyp, PREG_OFFSET_CAPTURE)) {
134                         # fallback paragraph contents following the page header
135                         if ($bodyp[1][1] < 256) {
136                                 return $bodyp[1][0];
137                         }
138                 }
139
140                 # starting paragraph for documents without title (assumed simple/partial)
141                 if (strpos($this->raw, '<h2') === FALSE and preg_match('{
142                         \A (?: <div [^>]*> \s* )* <p> \s* (.*?) </p>
143                 }sx', $this->raw, $bodyp)) {
144                         return $bodyp[1];
145                 }
146         }
147
148         function img()
149         {
150                 $this->img = NULL;
151                 $this->story;
152                 return $this->img;
153         }
154         function image()
155         {
156                 if ($override = @$this->meta['og:image']) {
157                         # prefer specific page image if found in metadata
158                         return $override;
159                 }
160
161                 if ( preg_match('/\bsrc="([^"]*)"/', $this->img, $src) ) {
162                         return $src[1];
163                 }
164         }
165         function thumb($size = '300x')
166         {
167                 if (!$this->image or $this->image[0] !== '/') return;
168                 return preg_replace(
169                         ['{^(?:/thumb/[^/]*)?}', '/\.groot(?=\.\w+$)/'], ["thumb/$size", ''],
170                         $this->image
171                 );
172         }
173 }
174
175 class PageSearch
176 {
177         function __construct($path = '.')
178         {
179                 $this->iterator = new RecursiveCallbackFilterIterator(
180                         new RecursiveDirectoryIterator($path),
181                         function ($current) {
182                                 if ($current->getFilename()[0] === '.') {
183                                         # skip hidden files and directories
184                                         return FALSE;
185                                 }
186                                 if ($current->isLink()) {
187                                         # ignore symlinks, original contents only
188                                         return FALSE;
189                                 }
190                                 if ($current->isDir()) {
191                                         # traverse subdirectories unless untracked in any amount
192                                         return !file_exists("$current/.gitignore");
193                                 }
194                                 # match **/*.html
195                                 return preg_match('/(?<!\.inc)\.html$/', $current->getFilename());
196                         }
197                 );
198         }
199
200         function files()
201         {
202                 # order alphabetically by link
203                 $dir = iterator_to_array(new RecursiveIteratorIterator($this->iterator));
204                 array_walk($dir, function (&$row, $name) {
205                         # prepare values for sorting (directory index first)
206                         $row = preg_replace('{/index\.html$}', '', $name);
207                 });
208                 asort($dir);
209                 return $dir;
210         }
211 }