1: 2: 3: 4: 5: 6: 7: 8: 9: 10: 11: 12: 13: 14: 15: 16: 17: 18: 19: 20: 21: 22: 23: 24: 25: 26: 27: 28: 29: 30: 31: 32: 33: 34: 35: 36: 37: 38: 39: 40: 41: 42: 43: 44: 45: 46: 47: 48: 49: 50: 51: 52: 53: 54: 55: 56: 57: 58: 59: 60: 61: 62: 63: 64: 65: 66: 67: 68: 69: 70: 71: 72: 73: 74: 75: 76: 77: 78: 79: 80: 81: 82: 83: 84: 85: 86: 87: 88: 89: 90: 91: 92: 93: 94: 95: 96: 97: 98: 99: 100: 101: 102: 103: 104: 105: 106: 107: 108: 109: 110: 111: 112: 113: 114: 115: 116: 117: 118: 119: 120: 121: 122: 123: 124: 125: 126: 127: 128: 129: 130: 131: 132: 133: 134: 135: 136: 137: 138: 139: 140: 141: 142: 143: 144: 145: 146: 147: 148: 149: 150: 151: 152: 153: 154: 155: 156: 157: 158: 159: 160: 161: 162: 163: 164: 165: 166: 167: 168: 169: 170: 171: 172: 173: 174: 175: 176: 177: 178: 179: 180: 181: 182: 183: 184: 185: 186: 187: 188: 189: 190: 191: 192: 193: 194: 195: 196: 197: 198: 199: 200: 201: 202: 203: 204: 205: 206: 207: 208: 209: 210: 211: 212: 213: 214: 215: 216: 217: 218: 219: 220: 221: 222: 223: 224: 225: 226: 227: 228: 229: 230: 231: 232: 233: 234: 235: 236: 237: 238: 239: 240: 241: 242: 243: 244: 245: 246: 247: 248: 249: 250: 251: 252: 253: 254: 255: 256: 257: 258: 259: 260: 261: 262: 263: 264: 265: 266: 267: 268: 269: 270: 271: 272: 273: 274: 275: 276: 277: 278: 279: 280: 281: 282: 283: 284: 285: 286: 287: 288: 289: 290: 291: 292: 293: 294: 295: 296: 297: 298: 299: 300: 301: 302: 303: 304: 305: 306: 307: 308: 309: 310: 311: 312: 313: 314: 315: 316: 317: 318: 319: 320: 321: 322: 323: 324: 325: 326: 327: 328: 329: 330: 331: 332: 333: 334: 335: 336: 337: 338: 339: 340: 341: 342: 343: 344: 345: 346: 347: 348: 349: 350: 351: 352: 353: 354: 355: 356: 357: 358: 359: 360: 361: 362: 363: 364: 365: 366: 367: 368: 369: 370: 371: 372: 373: 374: 375: 376: 377: 378: 379: 380: 381: 382: 383: 384: 385: 386: 387: 388: 389: 390: 391: 392: 393: 394: 395: 396: 397: 398: 399: 400: 401: 402: 403: 404: 405: 406: 407: 408: 409: 410: 411: 412: 413: 414: 415: 416: 417: 418: 419: 420: 421: 422: 423: 424: 425: 426: 427:
<?php
/**
* A daemon process that will import and keep bills up to date
*
* @package LegiScan\Utility
* @author LegiScan API Team <api@legiscan.com>
* @license https://opensource.org/licenses/BSD-2-Clause
* @copyright 2010-2020 LegiScan LLC
* @link https://legiscan.com/datasets
*
*/
// I say thee nay!
if (version_compare(PHP_VERSION, '5.4.0') < 0)
die('PHP 5.4.0 or higher is required');
// Include the LegiScan API Client
require_once('LegiScan.php');
// {{{ LegiScan_Worker Class
/**
* A daemon process that will import and keep bills up to date
*
* The `legiscand.php` script provides a daemon process that can use four
* different methods, controlled by the `update_type` setting, to keep a
* local database synchronized via LegiScan Pull API.
*
* ## Monitor
* This mode will use the `ls_monitor` table to keep a specific `bill_id`
* list updated. This list can be managed with `legiscan-cli.php`, though
* determining the `bill_id` is an exercise for the reader and would
* require additional scripting, though a likely source would be through
* the search engine.
*
* <code>
* php legiscan-cli.php --monitor 823882
* php legiscan-cli.php --unmonitor 823882
* </code>
*
* ## State
* This mode will synchronize the entire master list from one or more
* states. To set the state list edit `config.php` and add each state
* to the `states[]` setting.
*
* For example to track all legislation in California and US Congress:
*
* <code>
* states[] = CA
* states[] = US
* </code>
*
* __NOTE__: It is *HIGHLY* recommended pre-loading the current state
* [datasets](https://api.legiscan.com/dl) with `legiscan-bulk.php` prior
* to the first run to minimize the on-boarding queries.
*
* ## Search (National)
* This mode will synchronize the results of searches ran against the
* national database. To specify the searches edit the `config.php`
* and add each search to the `searches[]` setting.
*
* The searches will also be filtered by the global `relevance` cutoff
* setting, which can be overridden on a per search basis by prepending
* a different score and the pipe `|` character. In addition a `state`
* abbreviations can also be prefixed to override either national or
* state search. When used with a `relevance` override the `state`
* should appear first separated by a comma `,`.
*
* Also notice that the entire search string should be quoted, and any
* internal quotes should be escaped as `\"`.
*
* <code>
* searches[] = "gender AND bathroom"
* searches[] = "\"national popular vote\""
* searches[] = "42|hemp OR cannabis OR marijuana"
* searches[] = "NY|charter ADJ schools"
* searches[] = "CA,60|vaccination AND status:passed"
* </code>
*
* ## State Search
* This mode combines both of the other methods such that the
* `searches[]` are only ran against the `states[]` list,
* unless a search specific `state` is used.
*
* @see LegiScan_Process
* @see LegiScan_Pull
* @link https://api.legiscan.com/dl/
*
*/
class LegiScan_Worker
{
/**
* Worker loop that generates a bill_id list then imports / updates
* via {@link LegiScan_Pull::importBillList}
*
* @param integer $daemon
* If non-zero the worker will loop forever
*
*/
function worker($daemon)
{
try {
// Create an instance to generate pull requests
$legiscan = new LegiScan_Pull();
// Create an instance to write to database
$logic = new LegiScan_Process();
// Grab a handle to the database
$db = $logic->getDB();
// {{{ Check and validate config
$error_msg = '';
$update_type = strtolower(LegiScan::getConfig('update_type'));
$states = LegiScan::getConfig('states');
$searches = LegiScan::getConfig('searches');
$interval = LegiScan::getConfig('interval', 3600);
$default_relevance = (int) LegiScan::getConfig('relevance', 50);
$ignore_table = (bool) LegiScan::getConfig('use_ignore_table');
$valid_types = array('monitored','state','search','state_search');
if (!$update_type)
$error_msg .= "Configuration value update_type is missing\n";
elseif (!in_array($update_type, $valid_types))
$error_msg .= "Invalid configuration value for update_type $update_type\n";
if (!$default_relevance || !($default_relevance >= 0 && $default_relevance <= 100))
$error_msg .= "Invalid configuration value for default_relevance $default_relevance\n";
// At some point it would be better to run from cron...
if ($interval < 3600) $interval = 3600;
if ($interval > 86400) $interval = 86400;
if ($error_msg)
{
$msg = "Invalid Configuration\n\n$error_msg\nExiting\n";
echo $msg;
LegiScan::sendMail("LegiScan Daemon Error", $msg);
exit(1);
}
// }}}
do
{
// Reset the missing list and checked count each loop
$logic->resetMissing();
$checked = 0;
// Build ignore list every run
$ignore_list = array();
if ($ignore_table)
{
$stmt = $db->prepare("SELECT bill_id FROM ls_ignore");
$stmt->execute();
while ($r = $stmt->fetch())
{
$ignore_list[] = $r['bill_id'];
}
}
LegiScan::fileLog("LegiScanD starting $update_type update run");
// {{{ Make a bill_id request list
switch ($update_type)
{
// Specific bill_id list form ls_monitor table
case 'monitored':
$monitor_list = array();
// Use state_abbr to tie getMasterListRaw to "current" session to
// handle unmanaged ls_monitor entries for past sessions
$sql = "SELECT m.bill_id, s.state_abbr
FROM ls_monitor m
INNER JOIN ls_bill b ON m.bill_id = b.bill_id
INNER JOIN ls_state s ON b.state_id = s.state_id
ORDER BY s.state_id, m.bill_id";
$rs = $db->query($sql);
while ($r = $rs->fetch())
{
$monitor_list[$r['state_abbr']][$r['bill_id']] = 1;
}
foreach (array_keys($monitor_list) as $state)
{
// Get current master list for $state
$resp = $legiscan->getMasterListRaw($state);
if ($resp['status'] == LegiScan::API_OK)
{
$session = array_shift($resp['masterlist']);
foreach ($resp['masterlist'] as $bill)
{
// Compare master list to monitor list
if (isset($monitor_list[$state][$bill['bill_id']]))
{
$checked++;
$sql = "SELECT bill_id
FROM ls_bill
WHERE bill_id = :bill_id AND change_hash = :change_hash";
$stmt = $db->prepare($sql);
$stmt->bindValue(':bill_id', $bill['bill_id'], PDO::PARAM_INT);
$stmt->bindValue(':change_hash', $bill['change_hash'], PDO::PARAM_STR);
$stmt->execute();
$exists = $stmt->fetchColumn();
if (!$exists && !in_array($bill['bill_id'], $ignore_list))
{
$logic->request('bills', $bill['bill_id']);
}
}
}
}
}
break;
// State full replication
case 'state':
if (in_array('all', $states))
{
$states = array();
$state_list = $logic->getStateList();
foreach ($state_list as $state)
$states[] = $state['state_abbr'];
}
foreach ($states as $state)
{
// Normally this would be a session_id, however this short cut
// will mean the system always tracks "current" session
$resp = $legiscan->getMasterListRaw($state);
if ($resp['status'] == LegiScan::API_OK)
{
$session = array_shift($resp['masterlist']);
foreach ($resp['masterlist'] as $bill)
{
$checked++;
$sql = "SELECT bill_id
FROM ls_bill
WHERE bill_id = :bill_id AND change_hash = :change_hash";
$stmt = $db->prepare($sql);
$stmt->bindValue(':bill_id', $bill['bill_id'], PDO::PARAM_INT);
$stmt->bindValue(':change_hash', $bill['change_hash'], PDO::PARAM_STR);
$stmt->execute();
$exists = $stmt->fetchColumn();
if (!$exists && !in_array($bill['bill_id'], $ignore_list))
{
$logic->request('bills', $bill['bill_id']);
}
}
}
}
break;
// National searches
case 'search':
// To avoid largely duplicating code we reset the states
// array to ALL for update_type=search and fall through
// to the state_search code
$states = array('ALL');
// NOTE FALL THROUGH NO BREAK
// State searches
case 'state_search':
foreach ($states as $state)
{
foreach ($searches as $search)
{
$relevance = $default_relevance;
$exhausted = false;
$page = 1;
// Never trust whitespace
$search = trim($search);
// Check for a state override and tidy up search string
if (preg_match('#^([A-Z]{2})(\s*,\s*\d+)?\s*(\|.+)#i', $search, $m))
{
$state = strtoupper($m[1]);
$search = ltrim(ltrim(str_replace(' ', '', $m[2]), ',') . $m[3], '|');
}
// Check for a relevance override and tidy up search string
if (preg_match('#^(\d+)\s*\|\s*(.*)#', $search, $m))
{
$relevance = (int) $m[1];
$search = $m[2];
}
// Drop any remaining whitespace
$search = trim($search);
do
{
// Use searchRaw to get 2000 results at a time since we only care
// about relevance, bill_id and change_hash
$params = array(
'state' => $state,
'query' => $search,
'page' => $page,
);
$resp = $legiscan->getSearchRaw($params);
if ($resp['status'] == LegiScan::API_OK && $resp['searchresult']['summary']['count'] > 0)
{
$summary = $resp['searchresult']['summary'];
foreach ($resp['searchresult']['results'] as $result)
{
if ($result['relevance'] > $relevance)
{
$checked++;
$sql = "SELECT bill_id
FROM ls_bill
WHERE bill_id = :bill_id AND change_hash = :change_hash";
$stmt = $db->prepare($sql);
$stmt->bindValue(':bill_id', $result['bill_id'], PDO::PARAM_INT);
$stmt->bindValue(':change_hash', $result['change_hash'], PDO::PARAM_STR);
$stmt->execute();
$exists = $stmt->fetchColumn();
if (!$exists && !in_array($result['bill_id'], $ignore_list))
{
$logic->request('bills', $result['bill_id']);
}
}
else
{
$exhausted = true;
}
}
// More pages or exhausted?
if ($summary['page_total'] > $page)
$page++;
else
$exhausted = true;
}
else
{
// Bad status or 0 count in results
$exhausted = true;
}
} while (!$exhausted);
}
}
break;
}
// }}}
// Did we find any bills that were missing / changed
$missing = $logic->getMissing();
$cnt = isset($missing['bills']) ? count($missing['bills']) : 0;
LegiScan::fileLog("LegiScanD found $cnt / $checked bills to process");
// Do the thing!
if (!empty($missing['bills']))
{
$legiscan->importBillList($missing['bills'], $logic);
LegiScan::fileLog("LegiScanD processing complete");
}
// From the public pull perspective there are intrinsic cache delays
// so we take a nice long nap until its time to make the donuts again
if ($daemon)
sleep($interval);
} while ($daemon);
} catch (APIException $e) {
$msg = 'API Error: ' . $e->getMessage() . ' in ' . basename($e->getFile()) . ' on line ' . $e->getLine() . "\n";
echo $msg;
LegiScan::sendMail('LegiScan Daemon Error', $msg);
exit(1);
} catch (APIAccessException $e) {
$msg = 'API Access: ' . $e->getMessage() . ' in ' . basename($e->getFile()) . ' on line ' . $e->getLine() . "\n";
echo $msg;
LegiScan::sendMail('LegiScan Daemon Error', $msg);
exit(1);
} catch (APIStatusException $e) {
$msg = 'API Status: ' . $e->getMessage() . ' in ' . basename($e->getFile()) . ' on line ' . $e->getLine() . "\n";
echo $msg;
LegiScan::sendMail('LegiScan Daemon Error', $msg);
exit(1);
} catch (PDOException $e) {
$msg = 'Database Error: ' . $e->getMessage() . ' in ' . basename($e->getFile()) . ' on line ' . $e->getLine() . "\n";
echo $msg;
LegiScan::sendMail('LegiScan Daemon Error', $msg);
exit(1);
} catch (Exception $e) {
$msg = 'LegiScan Error: ' . $e->getMessage() . ' in ' . basename($e->getFile()) . ' on line ' . $e->getLine() . "\n";
echo $msg;
LegiScan::sendMail('LegiScan Daemon Error', $msg);
exit(1);
}
}
}
// }}}
// Sort out the command line options
$options = array(
'daemon',
);
legiscan_getopt($options);
$daemon = 0;
if (legiscan_option('daemon'))
$daemon = 42; // just 'cause
$worker = new LegiScan_Worker();
$worker->worker($daemon);