-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpopulateLinkDesc.php
More file actions
75 lines (71 loc) · 2.08 KB
/
populateLinkDesc.php
File metadata and controls
75 lines (71 loc) · 2.08 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
<?php
/**
* To the extent possible under law, I, Mark Hershberger, have waived all copyright and
* related or neighboring rights to Hello World. This work is published from the
* United States.
*
* @copyright CC0 http://creativecommons.org/publicdomain/zero/1.0/
* @author Mark A. Hershberger <mah@everybody.org>
* @ingroup Maintenance
*/
require_once "../../maintenance/Maintenance.php";
class PopulateLinkDesc extends Maintenance {
public function execute() {
$dbr = wfGetDB( DB_SLAVE );
$dbw = wfGetDB( DB_MASTER );
echo "Adding link_desc rows for externallinks rows lacking them...\n";
// TODO: Replace this with a DISTINCT query
$res = $dbr->select(
array( 'externallinks', 'page' ),
'el_to',
array( 'el_from=page_id', 'page_namespace=0' )
);
$results = array();
foreach ( $res as $row ) {
if ( !in_array( $row->el_to, $results ) ) {
$results[] = $row->el_to;
}
}
foreach ( $results as $result ) {
$res = $dbr->selectRow( 'link_desc', 'ld_url', array( 'ld_url' => $result ) );
if ( !$res ) {
echo " $result ...\n";
$dbw->insert( 'link_desc', array( 'ld_url' => $result ) );
}
}
echo "Adding descriptions for link_desc rows lacking them...\n";
$res = $dbr->select(
'link_desc',
array( 'ld_id', 'ld_url', 'ld_desc' ),
array( 'ld_desc' => NULL )
);
foreach ( $res as $row ) {
echo " " . $row->ld_url . "\n";
$doc = new DOMDocument();
@$doc->loadHTMLFile( $row->ld_url );
$xpath = new DOMXPath($doc);
if ( $xpath->query('//title')->item(0) ) {
$title = $xpath->query('//title')->item(0)->nodeValue;
echo " $title\n";
} else {
echo " Unable to retrieve\n";
$title = $row->ld_url;
}
$dbw->update (
'link_desc',
array( 'ld_desc' => $title ),
array( 'ld_id' => $row->ld_id )
);
}
}
/*$res = $dbr->select(
array( 'externallinks', 'link_desc' ),
array( 'el_to', 'ld_id', 'ld_desc' ),
array( 'el_to' => 'ld_url', 'ld_desc' => NULL )
);
foreach( $res as $row ) {
}*/
}
}
$maintClass = 'PopulateLinkDesc';
require_once RUN_MAINTENANCE_IF_MAIN;