diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c85e87f --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +scraper/config.php +scraper/scraper.mysql.cnf +scraper/target diff --git a/README.html b/README.html index e93be66..7555f76 100644 --- a/README.html +++ b/README.html @@ -32,7 +32,7 @@

Detailed Setup

"| /home/myusername/path/to/decrypt_mail.awk | /home/myusername/path/to/filter.php" -

The second component is a MySQL database. This is where all the bug data is actually stored after being extracted from the emails. You will need to set up a database with the necessary tables (schemas are in tables.sql). You will also need to rename the example.config.php file to bugmash.config.php and put your database access information into it. While you're at it, also fill in $_ME in the config file with the email address on your Bugzilla account (i.e. what shows up when you go to Bugzilla preferences).

+

The second component is a MySQL database. This is where all the bug data is actually stored after being extracted from the emails. You will need to set up a database with the necessary tables (schemas are in tables.sql). You will also need to rename the example.config.php file to bugmash.config.php and put your database access information into it. While you're at it, also fill in $_ME in the config file with the email address on your Bugzilla account (i.e. what shows up when you go to Bugzilla preferences). If you have multiple Bugzilla accounts you can put them all in.

IMPORTANT! Make sure that the bugmash.config.php file is not in your web server's document root (generally $HOME/www), otherwise your database credentials may be at risk of being snarfed by evil crackers!!

@@ -81,6 +81,8 @@

Known issues

Areas for improvement

diff --git a/README.md b/README.md new file mode 100644 index 0000000..a31a550 --- /dev/null +++ b/README.md @@ -0,0 +1,89 @@ +# Bugmash v0.1 + +A dashboard for quickly viewing and processing high volumes of bugmail. Also comes with the ability to add private notes and tags to bugs, and have those tags show up on Bugzilla itself (via a Scriptish user script). + +* Source code available at: [https://github.com/staktrace/bugmash](https://github.com/staktrace/bugmash) +* Original author: Kartikaya Gupta +* Contributors: maybe you! + +Jump to section: [Quick Setup](#qsetup) | [Detailed Setup](#dsetup) | [Usage](#usage) | [Known Issues](#kissues) | [Areas for improvement](#improve) + +## Quick Setup + +Bugmash is NOT trivial to install. At least, not yet. It also requires that you have some infrastructure set up on your part (database, PHP, etc.) to be able to run the different pieces. Therefore, use of bugmash is not for the faint of heart, and is really only recommended if the volume of bugmail you receive (or want to receive) is stupendous. A good qualification factor might be that you have declared bugmail bankruptcy at least once. Therefore, there is no quick setup. Work your way through the detailed setup instructions if you're still interested. + +## Detailed Setup + +Bugmash has a number of components that work together. The way you set up these components can vary somewhat, but you may need to twiddle a few things here and there to get things to work if you deviate too far from how I've set it up. Ideally as more people try to use it, they will contribute patches to make this easier. As it is, you should probably read through these instructions first before even attempting to set it up. + +First, there is an email scraper. This is contained in the filter.php file. You will need to set up your Bugzilla email such that you can feed the bugmail into this php script. The way I have it set up is I have my Bugzilla email account redirect to a mailbox on my webhost account (of the form myusername+bugmash@myaccountserver.dreamhost.com), and then I have a .forward file in my webhost account with a line like the one below: + +`"| /home/myusername/path/to/filter.php"` + +This takes all incoming email and pipes it to the filter.php script. Make sure you mark filter.php as executable, and update the hashbang line at the top to point to your php interpreter. The scraper extracts useful information from Bugzilla emails, and stuffs it into a MySQL database (which is the second component, detailed below). If you want to be able to process PGP/GPG-encrypted bugmail as well, you can additionally pipe the mail through decrypt_mail.awk before piping it to filter.php. This assumes you have no passphrase on your gpg key, such that "gpg --decrypt" will successfully decrypt stdin to stdout. In this case, your .forward file might look like this: + +`"| /home/myusername/path/to/decrypt_mail.awk | /home/myusername/path/to/filter.php"` + +The second component is a MySQL database. This is where all the bug data is actually stored after being extracted from the emails. You will need to set up a database with the necessary tables (schemas are in tables.sql). You will also need to rename the example.config.php file to bugmash.config.php and put your database access information into it. While you're at it, also fill in $_ME in the config file with the email address on your Bugzilla account (i.e. what shows up when you go to Bugzilla preferences). If you have multiple Bugzilla accounts you can put them all in. + +**IMPORTANT!** Make sure that the bugmash.config.php file is not in your web server's document root (generally $HOME/www), otherwise your database credentials may be at risk of being snarfed by evil crackers!! + +So, with these two components, bugmail you receive should be getting parsed and inserted correctly. If you are piping emails to the script that are not bugmail, they will be saved to the folder you specify in the config file. If, for whatever reason, the email scraper is unable to parse an actual bugmail, it will save the bugmail, along with a corresponding .err file, to the same directory that filter.php is in. This is so that you don't lose any bugmail, and can improve the scraper to handle the failures. (The most frequent reason this happens is when an unknown bug field changes value.) + +The third component is the front-end, which consists of the common.php, dashboard.php, wipe.php, and search.php files. These should be installed into a directory and served via a normal PHP-supporting webserver (e.g. Apache with mod_php). You will need to modify the $BUGMASH_DIR variable in common.php so that it can find the bugmash.config.php file with the database credentials. Once you have this set up, you can access the dashboard by going to the dashboard.php in your favourite web browser (i.e. Firefox). + +**IMPORTANT!** You should probably lock down access to the dashboard (and the other web-facing .php files) by using .htaccess or some other kind of authentication. Failure to do so could allow evil crackers to walk right in and mark all your bugmail as read!! + +Now, if you have set all of that up, there is one final (optional) component you can install. This consists of the tags.php file, which should be placed in the same web-facing directory as common.php, and the bugtags.user.js user script. This user script can be installed into Scriptish (a Firefox add-on) and modifies Bugzilla bug listing pages (such as the "My Bugs" page, or any search results page) to display and edit your personal bug tags. Before installing the user script into Scriptish, make sure you modify the TAGS_SERVER variable as appropriate so that it points to your web-facing tags.php file. + +## Usage + +As with most power tools, there are many intricacies to using Bugmash. These are documented here in favour of keeping the actual interface minimal and fast. + +### Bug blocks + +The primary mode of interaction with Bugmash is through the dashboard.php front-end. This will show all of the data from your bugmail that you have not marked as "viewed" at the time that the page is loaded. Note that the page does not auto-update or auto-refresh; you have to reload manually when you want to view the latest bug information. This is by design because I hate pages that move content around under my cursor. Here is a screenshot of what the dashboard looks like: + +
![Screenshot of the dashboard](bugmash-dashboard-screenshot.png)
+ +All of the data for a given bug is contained inside a "bug block" (the thing with the blue header and footer). Note that if a bug is marked "secure" (so that the bugmail is PGP/GPG-encrypted), the header and border of the bug block is red instead. The header contains the bug number, the bug title, and two links in the top-right corner. The first link will be either "N" (for Note) or "U" (for Update). Clicking on this link will take you to the bottom of the dashboard, to the bug notes section, and allow you to add or edit the corresponding note and tag information for that bug. The "N" link will be displayed when there is no pre-existing note/tag, and the "U" link will be displayed when there is already a pre-existing note/tag for the bug. You can also hover your mouse over the "U" link to get the note/tag information in a tooltip. + +The second link is an "X" that marks the bug block as viewed. Clicking on the X makes the bug block disappear and shifts the column up so that you can click on the X for the next bug block without moving your mouse. Note that when you mark a bug block as viewed, the block is hidden and an XMLHttpRequest is sent to the server; if that XMLHttpRequest fails, the bug block will be made visible again, and the "X" will be replaced with "[E]" (for Error). If this happens, you can click on the "[E]" to try again (it does the same thing as the X). + +The data inside the bug block should be largely self-explanatory; it is basically a compressed version of the data you should be accustomed to seeing in normal bugmail. Note that URLs and bug numbers should be clickable even though they are not blue-and-underlined. This is to reduce visual noise for when you have a lot of them. The footer on the bug block has a link to take you back to the top of the bug block; this is useful for when the bug block is taller than a screen, or if you're on a mobile device with a small screen. The other noteworthy thing about bug block data is that comments made by TinderboxPushlog Robot will be greyed out. This allows you to quickly scan a bug block for a randomorange bug and see if there were any "real" comments made. Also something to note is that for better layout of bug data, whitespace in comments is often munged and/or thrown away (this also happens because of the way emails are parsed). + +The bug blocks are arranged into four columns by default (on narrow screens the four columns get stacked into one column such that the leftmost column is on top and the rightmost is at the bottom). The four columns are generally arranged in decreasing order of importance from left to right. The leftmost column contains bugs that have a review requested of you, or contain a response to a review that you requested. The second column is for bugs that are assigned to you, or which you filed. The third column is for bugs you are CC'd on, or have voted on. The final column is for pretty much anything else (usually component-watching). If you would like to change this sorting behaviour, the column() function in dashboard.php is the place to look. + +### Bug notes and tags + +Below all the bug blocks is the bug notes section. Each bug note has a bug number, a note field, a tag field, and the bug title (if available). You can add notes/tags either by using the N/U link on the bug block, or the "Add note" button at the bottom of the section. Use the "Save notes" link to save changes (this will reload the whole page, rather than using XMLHttpRequest). Saving an empty note and tag for a bug removes that row from the notes section. + +The primary difference between notes and tags is that, if you have the bugtags.user.js script installed, the tags will show up in Bugzilla next to the title of the bug on any bug listing page, such as shown in the screenshot below. If you do not have the userscript, then the tags field is redundant. Tags that start with a bang (!) will be displayed by the userscript in red, or blue otherwise. You can click on the tag to edit it; bugs with no tags will have a "+" that you can click to add tags. As tag updates from Bugzilla pages are done via XMLHttpRequest, they are shown in yellow while the update request is pending, and might revert to their old value if the update fails. + +
![Screenshot of bugtags on Bugzilla](bugmash-bugtags-screenshot.png)
+ +**WARNING!** Be careful when editing tags from both the Bugzilla userscript interface and the dashboard interface, as you may inadvertently clobber changes you made. In particular this may happen if you make a change from Bugzilla, and then make a change (possibly to a different bug's tag) from the dashboard without reloading it, and then saving the dashboard bug notes. + +### Search + +Below the bug notes section is the search field. This is a very bare-bones search that searches through your saved bug data. Search queries are split into tokens on whitespace, and only bugs that match all of the tokens are shown. The results are limited to bugs modified in the last 15 days, and are sorted by time (most recent first). Try not to search for really common substrings because there is no limiting on the number of hits other than recency, and the search implementation isn't particularly efficient - it may bring your server to its knees. + +### Handling errors + +Errors may occur if a bugmail cannot be parsed by the scraper. When this happens, the email and a corresponding .err file is saved into the folder where filter.php resides. The number of .err files is shown in browser window title bar when you are viewing the dashboard. This allows you to quickly check if there have been any errors. When an error occurs, you'll need to look at the .err file and the email to see why the email could not be parsed, and patch up the filter.php to handle it. Or you could file a bug against the github repo and get me to do it. + +## Known issues + +* The code to strikethrough obsoleted review requests doesn't seem to be taking effect. Not sure where in the code this bug is, haven't really looked at it. +* When concurrently editing tags via both the user script and the dashboard, it is easy to accidentally clobber tag changes. This should be detected and handled better. +* Bugzilla comments which contain "--" on a single line will be prematurely terminated, since that is taken as the end-of-comment marker. +* Review authors whose name/email wraps onto a second line in the email are not handled properly. + +## Areas for improvement + +* Add a quick-reply form to comment on bugs +* Beef up the search feature +* Hook into the Bugzilla API for more awesome +* Prune data from the database periodically +* A way to undo marking bugs as viewed, in case of accidentally marking a bug as viewed +* A way to specify a list of terms to highlight in bug blocks diff --git a/bugtags.user.js b/bugtags.user.js deleted file mode 100644 index 24ffb4d..0000000 --- a/bugtags.user.js +++ /dev/null @@ -1,140 +0,0 @@ -// ==UserScript== -// @id bugtags@staktrace.com -// @name BugTags -// @namespace https://staktrace.com -// @author Kartikaya Gupta https://staktrace.com/ -// @version 1.0 -// @description Allows you tag bugs; the tags are then shown on Bugzilla pages -// @match https://bugzilla.mozilla.org/* -// @run-at document-end -// ==/UserScript== - -var TAGS_SERVER = 'https://example.com/path/to/tags.php'; // point this to your tags.php - -function getUser() { - var links = document.links; - for (var i = 0; i < links.length; i++) { - if (links[i].href.indexOf( "logout" ) > 0) { - var logoutLink = links[i]; - return logoutLink.nextSibling.textContent.trim(); - } - } - return null; -} - -function getBugNumbers() { - var bugnumbers = new Array(); - - var table = document.getElementsByClassName( "bz_buglist" ); - if (table.length != 1) { - return bugnumbers; - } - table = table[0]; - - var rows = table.getElementsByClassName( "bz_bugitem" ); - for (var i = 0; i < rows.length; i++) { - bugnumbers.push( rows[i].id.substring( 1 ) ); - } - return bugnumbers; -} - -function insertBugTags( user, bugnumbers ) { - var reqData = new FormData(); - reqData.append( "user", user ); - reqData.append( "action", "get" ); - reqData.append( "bugs", bugnumbers.join( "," ) ); - - GM_xmlhttpRequest({ - method: "POST", - url: TAGS_SERVER, - data: reqData, - onload: function( res ) { - var response = res.responseJSON; - var rows = document.getElementsByClassName( "bz_buglist" )[0].getElementsByClassName( "bz_bugitem" ); - for (var i = 0; i < rows.length; i++) { - var row = rows[i]; - var bugnumber = row.id.substring( 1 ); - var cell = row.cells[ row.cells.length - 1 ]; - var color = 'blue'; - var tags = '+'; - if (response[ bugnumber ]) { - tags = response[ bugnumber ].join( ", " ); - if (tags.charAt( 0 ) == '!') { - tags = tags.substring( 1 ); - color = 'red'; - } - } - var tag = document.createElement( 'a' ); - tag.id = 'bugmash' + bugnumber; - tag.href = '#'; - tag.style.fontSize = 'smaller'; - tag.style.color = color; - tag.textContent = tags; - tag.addEventListener( 'click', updateBugTag, false ); - cell.insertBefore( tag, cell.firstChild ); - } - }, - onerror: function( res ) { - GM_log( "Error fetching bug tags!" ); - GM_log( res.statusText ); - GM_log( res.responseText ); - } - }); -} - -function updateBugTag( e ) { - e.preventDefault(); - - var bugtag = e.target; - var bugnumber = bugtag.id.substring( 7 ); // strip "bugmash" - var tags = bugtag.textContent; - if (tags == '+') { - tags = ''; - } - var origColor = bugtag.style.color; - if (origColor == 'red') { - tags = '!' + tags; - } - tags = prompt( "Enter new tags:", tags ); - if (tags == null) { - return false; - } - - bugtag.style.color = 'yellow'; - - var reqData = new FormData(); - reqData.append( "user", user ); - reqData.append( "action", "set" ); - reqData.append( "bugs", bugnumber ); - reqData.append( "tags", tags ); - - GM_xmlhttpRequest({ - method: "POST", - url: TAGS_SERVER, - data: reqData, - onload: function() { - var color = 'blue'; - if (tags.length > 0) { - if (tags.charAt( 0 ) == '!') { - tags = tags.substring( 1 ); - color = 'red'; - } - bugtag.textContent = tags; - } else { - bugtag.textContent = '+'; - } - bugtag.style.color = color; - }, - onerror: function() { - bugtag.style.color = origColor; - } - }); -} - -var user = getUser(); -if (user) { - var bugnumbers = getBugNumbers(); - if (bugnumbers.length > 0) { - insertBugTags( user, bugnumbers ); - } -} diff --git a/bugtags/bugtags.user.js b/bugtags/bugtags.user.js new file mode 100644 index 0000000..8555a30 --- /dev/null +++ b/bugtags/bugtags.user.js @@ -0,0 +1,231 @@ +// ==UserScript== +// @id bugtags@staktrace.com +// @name BugTags +// @namespace https://staktrace.com +// @author Kartikaya Gupta https://staktrace.com/ +// @version 1.0 +// @description Allows you tag bugs; the tags are then shown on Bugzilla pages +// @match https://bugzilla.mozilla.org/* +// @run-at document-end +// @grant GM_log +// @grant GM_xmlhttpRequest +// ==/UserScript== + +var TAGS_SERVER = 'https://bugmash.staktrace.com/tags.php'; // point this to your tags.php + +function GM_log( thing ) { + console.log( thing ); +} + +function GM_xmlhttpRequest( blob ) { + var xhr = new XMLHttpRequest(); + xhr.onload = function() { + blob.onload( xhr ); + }; + xhr.onerror = function() { + blob.onerror( xhr ); + }; + xhr.open( blob.method, blob.url, true ); + xhr.send( blob.data ); +} + +function getUser() { + var login = document.querySelector('td#moz_login .anchor'); + if (login != null) { + return login.textContent.trim(); + } + var newLogin = document.querySelector('.email'); + if (newLogin != null) { + return newLogin.textContent.trim(); + } + var links = document.links; + for (var i = 0; i < links.length; i++) { + if (links[i].href.indexOf( "logout" ) > 0) { + var logoutLink = links[i]; + return logoutLink.nextSibling.textContent.trim(); + } + } + return null; +} + +function getListBugNumbers() { + var bugnumbers = new Array(); + + var table = document.getElementsByClassName( "bz_buglist" ); + if (table.length != 1) { + return bugnumbers; + } + table = table[0]; + + var rows = table.getElementsByClassName( "bz_bugitem" ); + for (var i = 0; i < rows.length; i++) { + bugnumbers.push( rows[i].id.substring( 1 ) ); + } + return bugnumbers; +} + +function getTreeBugNumbers() { + var bugnumbers = new Array(); + + var tree = document.getElementsByClassName( "tree" ); + if (tree.length != 1) { + return bugnumbers; + } + tree = tree[0]; + + var nodes = tree.getElementsByClassName( "summ_deep" ); + for (var i = 0; i < nodes.length; i++) { + bugnumbers.push( nodes[i].id ); + } + nodes = tree.getElementsByClassName( "summ" ); + for (var i = 0; i < nodes.length; i++) { + bugnumbers.push( nodes[i].id ); + } + return bugnumbers; +} + +function buildTag( response, bugnumber ) { + var color = 'blue'; + var tags = '+'; + if (response[ bugnumber ]) { + tags = response[ bugnumber ].join( ", " ); + if (tags.charAt( 0 ) == '!') { + tags = tags.substring( 1 ); + color = 'red'; + } + } + var tag = document.createElement( 'a' ); + tag.id = 'bugmash' + bugnumber; + tag.href = '#'; + tag.style.fontSize = 'smaller'; + tag.style.color = color; + tag.textContent = tags; + tag.addEventListener( 'click', updateBugTag, false ); + return tag; +} + +function insertListBugTags( user, bugnumbers ) { + var reqData = new FormData(); + reqData.append( "user", user ); + reqData.append( "action", "get" ); + reqData.append( "bugs", bugnumbers.join( "," ) ); + + var arg = { + method: "POST", + url: TAGS_SERVER, + data: reqData, + onload: function( res ) { + var response = JSON.parse( res.responseText ); + var rows = document.getElementsByClassName( "bz_buglist" )[0].getElementsByClassName( "bz_bugitem" ); + for (var i = 0; i < rows.length; i++) { + var row = rows[i]; + var bugnumber = row.id.substring( 1 ); + var tag = buildTag( response, bugnumber ); + var cell = row.cells[ row.cells.length - 1 ]; + cell.insertBefore( tag, cell.firstChild ); + } + }, + onerror: function( res ) { + GM_log( "Error fetching bug tags!" ); + GM_log( res.status ); + GM_log( res.responseText ); + } + }; + GM_xmlhttpRequest(arg); +} + +function insertTreeBugTags( user, bugnumbers ) { + var reqData = new FormData(); + reqData.append( "user", user ); + reqData.append( "action", "get" ); + reqData.append( "bugs", bugnumbers.join( "," ) ); + + GM_xmlhttpRequest({ + method: "POST", + url: TAGS_SERVER, + data: reqData, + onload: function( res ) { + var response = JSON.parse( res.responseText ); + var nodes = document.getElementsByClassName( "tree" )[0].getElementsByClassName( "summ_deep" ); + for (var i = 0; i < nodes.length; i++) { + var node = nodes[i]; + var bugnumber = node.id; + var tag = buildTag( response, bugnumber ); + node.insertBefore( tag, node.lastElementChild ); + } + nodes = document.getElementsByClassName( "tree" )[0].getElementsByClassName( "summ" ); + for (var i = 0; i < nodes.length; i++) { + var node = nodes[i]; + var bugnumber = node.id; + var tag = buildTag( response, bugnumber ); + node.insertBefore( tag, node.lastElementChild ); + } + }, + onerror: function( res ) { + GM_log( "Error fetching bug tags!" ); + GM_log( res.status ); + GM_log( res.responseText ); + } + }); +} + +function updateBugTag( e ) { + e.preventDefault(); + + var bugtag = e.target; + var bugnumber = bugtag.id.substring( 7 ); // strip "bugmash" + var tags = bugtag.textContent; + if (tags == '+') { + tags = ''; + } + var origColor = bugtag.style.color; + if (origColor == 'red') { + tags = '!' + tags; + } + tags = prompt( "Enter new tags:", tags ); + if (tags == null) { + return false; + } + + bugtag.style.color = 'yellow'; + + var reqData = new FormData(); + reqData.append( "user", user ); + reqData.append( "action", "set" ); + reqData.append( "bugs", bugnumber ); + reqData.append( "tags", tags ); + + GM_xmlhttpRequest({ + method: "POST", + url: TAGS_SERVER, + data: reqData, + onload: function() { + var color = 'blue'; + if (tags.length > 0) { + if (tags.charAt( 0 ) == '!') { + tags = tags.substring( 1 ); + color = 'red'; + } + bugtag.textContent = tags; + } else { + bugtag.textContent = '+'; + } + bugtag.style.color = color; + }, + onerror: function() { + bugtag.style.color = origColor; + } + }); +} + +var user = getUser(); +if (user) { + var bugnumbers = getListBugNumbers(); + if (bugnumbers.length > 0) { + insertListBugTags( user, bugnumbers ); + } + bugnumbers = getTreeBugNumbers(); + if (bugnumbers.length > 0) { + insertTreeBugTags( user, bugnumbers ); + } +} diff --git a/bugtags/manifest.json b/bugtags/manifest.json new file mode 100644 index 0000000..3c62015 --- /dev/null +++ b/bugtags/manifest.json @@ -0,0 +1,18 @@ +{ + "manifest_version": 2, + "name": "BugTags", + "version": "1.2", + + "description": "Displays tags associated with bugs on bugzilla pages.", + + "content_scripts": [ + { + "matches": ["https://bugzilla.mozilla.org/*"], + "js": ["bugtags.user.js"] + } + ], + + "permissions": [ + "https://bugmash.staktrace.com/*" + ] +} diff --git a/nginx/nginx.conf b/nginx/nginx.conf new file mode 100644 index 0000000..c8f2a51 --- /dev/null +++ b/nginx/nginx.conf @@ -0,0 +1,39 @@ +server { + listen [::]:80; + listen 80; + server_name bugmash.staktrace.com; + root /ebs/bugmash/www; + + location /.well-known { + try_files $uri $uri/ =404; + } + + location / { + return 301 https://$host$request_uri; + } +} + +server { + listen [::]:443 ssl; + listen 443 ssl; + server_name bugmash.staktrace.com; + root /ebs/bugmash/www; + index index.php index.html index.htm; + + include includes/ssl.inc; + ssl_certificate /usr/local/etc/letsencrypt/live/bugmash.staktrace.com/fullchain.pem; + ssl_certificate_key /usr/local/etc/letsencrypt/live/bugmash.staktrace.com/privkey.pem; + + location / { + try_files $uri $uri/ =404; + } + + location ~ \.php$ { + auth_basic "Bugmash"; + auth_basic_user_file "/ebs/bugmash/www-conf/bugmash.htpass"; + try_files $uri =404; + include includes/php-inner.inc; + } + + include includes/error.inc; +} diff --git a/tables.sql b/schemas/tables.sql similarity index 59% rename from tables.sql rename to schemas/tables.sql index 9bff92e..0307a18 100644 --- a/tables.sql +++ b/schemas/tables.sql @@ -11,7 +11,7 @@ CREATE TABLE `requests` ( KEY (`bug`), KEY (`stamp`), KEY (`viewed`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8; +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 ROW_FORMAT=COMPACT; CREATE TABLE `reviews` ( `id` int(10) unsigned NOT NULL AUTO_INCREMENT, @@ -29,7 +29,7 @@ CREATE TABLE `reviews` ( KEY (`bug`), KEY (`stamp`), KEY (`viewed`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8; +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 ROW_FORMAT=COMPACT; CREATE TABLE `changes` ( `id` int(10) unsigned NOT NULL AUTO_INCREMENT, @@ -38,13 +38,13 @@ CREATE TABLE `changes` ( `viewed` tinyint(1) NOT NULL DEFAULT 0, `reason` varchar(10) NOT NULL, `field` varchar(255) NOT NULL, - `oldval` varchar(255) NOT NULL, - `newval` varchar(255) NOT NULL, + `oldval` varchar(1024) NOT NULL, + `newval` varchar(1024) NOT NULL, PRIMARY KEY (`id`), KEY (`bug`), KEY (`stamp`), KEY (`viewed`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8; +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 ROW_FORMAT=COMPACT; CREATE TABLE `comments` ( `id` int(10) unsigned NOT NULL AUTO_INCREMENT, @@ -59,7 +59,7 @@ CREATE TABLE `comments` ( KEY (`bug`), KEY (`stamp`), KEY (`viewed`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8; +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 ROW_FORMAT=COMPACT; CREATE TABLE `newbugs` ( `id` int(10) unsigned NOT NULL AUTO_INCREMENT, @@ -67,31 +67,61 @@ CREATE TABLE `newbugs` ( `stamp` datetime NOT NULL, `viewed` tinyint(1) NOT NULL DEFAULT 0, `reason` varchar(10) NOT NULL, - `title` varchar(255) NOT NULL, + `title` varchar(1024) NOT NULL, `author` varchar(255) NOT NULL, `description` mediumtext NOT NULL, PRIMARY KEY (`id`), KEY (`bug`), KEY (`stamp`), KEY (`viewed`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8; +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 ROW_FORMAT=COMPACT; + +CREATE TABLE `gh_issues` ( + `id` int(10) unsigned NOT NULL AUTO_INCREMENT, + `repo` varchar(100) NOT NULL, + `issue` varchar(40) NOT NULL, + `stamp` datetime NOT NULL, + `viewed` tinyint(1) NOT NULL DEFAULT 0, + `reason` varchar(10) NOT NULL, + `hash` varchar(255) NOT NULL, + `author` varchar(255) NOT NULL, + `comment` mediumtext NOT NULL, + PRIMARY KEY (`id`), + KEY (`repo`, `issue`), + KEY (`stamp`), + KEY (`viewed`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 ROW_FORMAT=COMPACT; + +CREATE TABLE `phab_diffs` ( + `id` int(10) unsigned NOT NULL AUTO_INCREMENT, + `revision` varchar(32) NOT NULL, + `stamp` datetime NOT NULL, + `viewed` tinyint(1) NOT NULL DEFAULT 0, + `reason` varchar(10) NOT NULL, + `author` varchar(255) NOT NULL, + `comment` mediumtext NOT NULL, + PRIMARY KEY (`id`), + KEY (`revision`), + KEY (`stamp`), + KEY (`viewed`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 ROW_FORMAT=COMPACT; CREATE TABLE `metadata` ( `id` int(10) unsigned NOT NULL AUTO_INCREMENT, - `bug` int(10) unsigned NOT NULL DEFAULT 0, + `bug` varchar(128) NOT NULL, `stamp` datetime NOT NULL, - `title` varchar(255) NOT NULL DEFAULT '', + `title` varchar(1024) NOT NULL DEFAULT '', `secure` tinyint(1) NOT NULL DEFAULT 0, - `note` mediumtext NOT NULL DEFAULT '', + `note` mediumtext NOT NULL, PRIMARY KEY (`id`), UNIQUE KEY (`bug`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8; +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 ROW_FORMAT=COMPACT; CREATE TABLE `tags` ( `id` int(10) unsigned NOT NULL AUTO_INCREMENT, - `bug` int(10) unsigned NOT NULL, - `tag` varchar(255) NOT NULL, + `bug` varchar(128) NOT NULL, + `tag` varchar(100) NOT NULL, PRIMARY KEY (`id`), KEY (`bug`), KEY (`tag`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8; +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 ROW_FORMAT=COMPACT; diff --git a/scraper/Cargo.lock b/scraper/Cargo.lock new file mode 100644 index 0000000..f90f587 --- /dev/null +++ b/scraper/Cargo.lock @@ -0,0 +1,696 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +[[package]] +name = "aho-corasick" +version = "0.6.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "memchr 2.0.2 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "arrayref" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "atoi" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "num-traits 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "base64" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "byteorder 1.2.6 (registry+https://github.com/rust-lang/crates.io-index)", + "safemem 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "bit-vec" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "bitflags" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "block-buffer" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "arrayref 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", + "byte-tools 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "bufstream" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "byte-tools" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "byteorder" +version = "1.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "cc" +version = "1.0.25" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "cfg-if" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "chrono" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "num-integer 0.1.39 (registry+https://github.com/rust-lang/crates.io-index)", + "num-traits 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)", + "time 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "cloudabi" +version = "0.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "digest" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "generic-array 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "encoding" +version = "0.2.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "encoding-index-japanese 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)", + "encoding-index-korean 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)", + "encoding-index-simpchinese 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)", + "encoding-index-singlebyte 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)", + "encoding-index-tradchinese 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "encoding-index-japanese" +version = "1.20141219.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "encoding_index_tests 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "encoding-index-korean" +version = "1.20141219.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "encoding_index_tests 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "encoding-index-simpchinese" +version = "1.20141219.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "encoding_index_tests 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "encoding-index-singlebyte" +version = "1.20141219.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "encoding_index_tests 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "encoding-index-tradchinese" +version = "1.20141219.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "encoding_index_tests 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "encoding_index_tests" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "fake-simd" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "flate2" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)", + "miniz-sys 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "fnv" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "fuchsia-zircon" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", + "fuchsia-zircon-sys 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "fuchsia-zircon-sys" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "generic-array" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "typenum 1.10.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "idna" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "matches 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", + "unicode-bidi 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)", + "unicode-normalization 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "itoa" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "kernel32-sys" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "lazy_static" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "version_check 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "libc" +version = "0.2.43" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "mailparse" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "base64 0.9.2 (registry+https://github.com/rust-lang/crates.io-index)", + "encoding 0.2.33 (registry+https://github.com/rust-lang/crates.io-index)", + "quoted_printable 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "maplit" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "matches" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "memchr" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "miniz-sys" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "mysql" +version = "14.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "bit-vec 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", + "bufstream 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", + "byteorder 1.2.6 (registry+https://github.com/rust-lang/crates.io-index)", + "flate2 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", + "fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)", + "mysql_common 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)", + "named_pipe 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "net2 0.2.33 (registry+https://github.com/rust-lang/crates.io-index)", + "nix 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)", + "serde 1.0.78 (registry+https://github.com/rust-lang/crates.io-index)", + "serde_json 1.0.27 (registry+https://github.com/rust-lang/crates.io-index)", + "smallvec 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)", + "twox-hash 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)", + "url 1.7.1 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "mysql_common" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "atoi 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", + "base64 0.9.2 (registry+https://github.com/rust-lang/crates.io-index)", + "bit-vec 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", + "bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", + "byteorder 1.2.6 (registry+https://github.com/rust-lang/crates.io-index)", + "chrono 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", + "num-bigint 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "rand 0.5.5 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)", + "serde 1.0.78 (registry+https://github.com/rust-lang/crates.io-index)", + "serde_json 1.0.27 (registry+https://github.com/rust-lang/crates.io-index)", + "sha1 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)", + "sha2 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)", + "smallvec 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)", + "time 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)", + "twox-hash 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)", + "uuid 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "named_pipe" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "net2" +version = "0.2.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "cfg-if 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "nix" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", + "cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)", + "cfg-if 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)", + "void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "num-bigint" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "num-integer 0.1.39 (registry+https://github.com/rust-lang/crates.io-index)", + "num-traits 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "num-integer" +version = "0.1.39" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "num-traits 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "num-traits" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "percent-encoding" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "quoted_printable" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "rand" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)", + "fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)", + "rand_core 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "rand_core" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "redox_syscall" +version = "0.1.40" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "regex" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "aho-corasick 0.6.8 (registry+https://github.com/rust-lang/crates.io-index)", + "memchr 2.0.2 (registry+https://github.com/rust-lang/crates.io-index)", + "regex-syntax 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)", + "thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", + "utf8-ranges 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "regex-syntax" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "ucd-util 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "ryu" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "safemem" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "scraper" +version = "0.1.0" +dependencies = [ + "mailparse 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)", + "mysql 14.1.0 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)", + "victoria-dom 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "serde" +version = "1.0.78" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "serde_json" +version = "1.0.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "itoa 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)", + "ryu 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)", + "serde 1.0.78 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "sha1" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "sha2" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "block-buffer 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", + "byte-tools 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "digest 0.7.5 (registry+https://github.com/rust-lang/crates.io-index)", + "fake-simd 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "smallvec" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "thread_local" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "lazy_static 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "time" +version = "0.1.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)", + "redox_syscall 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "twox-hash" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "rand 0.5.5 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "typenum" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "ucd-util" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "unicode-bidi" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "matches 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "unicode-normalization" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "unreachable" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "url" +version = "1.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "idna 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", + "matches 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", + "percent-encoding 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "utf8-ranges" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "uuid" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "cfg-if 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "version_check" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "victoria-dom" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "lazy_static 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", + "maplit 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "void" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "winapi" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "winapi" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "winapi-build" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[metadata] +"checksum aho-corasick 0.6.8 (registry+https://github.com/rust-lang/crates.io-index)" = "68f56c7353e5a9547cbd76ed90f7bb5ffc3ba09d4ea9bd1d8c06c8b1142eeb5a" +"checksum arrayref 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "0d382e583f07208808f6b1249e60848879ba3543f57c32277bf52d69c2f0f0ee" +"checksum atoi 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "baa5dc0129ce09c8c87e2714a0b67c095d4a5e3261a7bbb3d7ac44d43d5dd190" +"checksum base64 0.9.2 (registry+https://github.com/rust-lang/crates.io-index)" = "85415d2594767338a74a30c1d370b2f3262ec1b4ed2d7bba5b3faf4de40467d9" +"checksum bit-vec 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "4440d5cb623bb7390ae27fec0bb6c61111969860f8e3ae198bfa0663645e67cf" +"checksum bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "228047a76f468627ca71776ecdebd732a3423081fcf5125585bcd7c49886ce12" +"checksum block-buffer 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a076c298b9ecdb530ed9d967e74a6027d6a7478924520acddcddc24c1c8ab3ab" +"checksum bufstream 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "f2f382711e76b9de6c744cc00d0497baba02fb00a787f088c879f01d09468e32" +"checksum byte-tools 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "560c32574a12a89ecd91f5e742165893f86e3ab98d21f8ea548658eb9eef5f40" +"checksum byteorder 1.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "90492c5858dd7d2e78691cfb89f90d273a2800fc11d98f60786e5d87e2f83781" +"checksum cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)" = "f159dfd43363c4d08055a07703eb7a3406b0dac4d0584d96965a3262db3c9d16" +"checksum cfg-if 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "0c4e7bb64a8ebb0d856483e1e682ea3422f883c5f5615a90d51a2c82fe87fdd3" +"checksum chrono 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)" = "45912881121cb26fad7c38c17ba7daa18764771836b34fab7d3fbd93ed633878" +"checksum cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f" +"checksum digest 0.7.5 (registry+https://github.com/rust-lang/crates.io-index)" = "5b29c278aa8fd30796bd977169e8004b4aa88cdcd2f32a6eb22bc2d5d38df94a" +"checksum encoding 0.2.33 (registry+https://github.com/rust-lang/crates.io-index)" = "6b0d943856b990d12d3b55b359144ff341533e516d94098b1d3fc1ac666d36ec" +"checksum encoding-index-japanese 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)" = "04e8b2ff42e9a05335dbf8b5c6f7567e5591d0d916ccef4e0b1710d32a0d0c91" +"checksum encoding-index-korean 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)" = "4dc33fb8e6bcba213fe2f14275f0963fd16f0a02c878e3095ecfdf5bee529d81" +"checksum encoding-index-simpchinese 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)" = "d87a7194909b9118fc707194baa434a4e3b0fb6a5a757c73c3adb07aa25031f7" +"checksum encoding-index-singlebyte 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)" = "3351d5acffb224af9ca265f435b859c7c01537c0849754d3db3fdf2bfe2ae84a" +"checksum encoding-index-tradchinese 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)" = "fd0e20d5688ce3cab59eb3ef3a2083a5c77bf496cb798dc6fcdb75f323890c18" +"checksum encoding_index_tests 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "a246d82be1c9d791c5dfde9a2bd045fc3cbba3fa2b11ad558f27d01712f00569" +"checksum fake-simd 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "e88a8acf291dafb59c2d96e8f59828f3838bb1a70398823ade51a84de6a6deed" +"checksum flate2 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "37847f133aae7acf82bb9577ccd8bda241df836787642654286e79679826a54b" +"checksum fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "2fad85553e09a6f881f739c29f0b00b0f01357c743266d478b68951ce23285f3" +"checksum fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2e9763c69ebaae630ba35f74888db465e49e259ba1bc0eda7d06f4a067615d82" +"checksum fuchsia-zircon-sys 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3dcaa9ae7725d12cdb85b3ad99a434db70b468c09ded17e012d86b5c1010f7a7" +"checksum generic-array 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ef25c5683767570c2bbd7deba372926a55eaae9982d7726ee2a1050239d45b9d" +"checksum idna 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "38f09e0f0b1fb55fdee1f17470ad800da77af5186a1a76c026b679358b7e844e" +"checksum itoa 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "1306f3464951f30e30d12373d31c79fbd52d236e5e896fd92f96ec7babbbe60b" +"checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d" +"checksum lazy_static 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ca488b89a5657b0a2ecd45b95609b3e848cf1755da332a0da46e2b2b1cb371a7" +"checksum libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)" = "76e3a3ef172f1a0b9a9ff0dd1491ae5e6c948b94479a3021819ba7d860c8645d" +"checksum mailparse 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "d1d5ea71343df3c508b245824ec17f5c6f86f79f99ce999bf6ac51edf0465685" +"checksum maplit 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d" +"checksum matches 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "7ffc5c5338469d4d3ea17d269fa8ea3512ad247247c30bd2df69e68309ed0a08" +"checksum memchr 2.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a3b4142ab8738a78c51896f704f83c11df047ff1bda9a92a661aa6361552d93d" +"checksum miniz-sys 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)" = "609ce024854aeb19a0ef7567d348aaa5a746b32fb72e336df7fcc16869d7e2b4" +"checksum mysql 14.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e5637e744f6b4512cb89e203b04af2bd2157807561bc00a6de29ff5dc9df5d25" +"checksum mysql_common 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4b24fd8e687c238438e3a62cfd5c22558be340077eb1597f02c20be29bad617e" +"checksum named_pipe 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8ed10a5ac4f5f7e5d75552b12c1d5d542debca81e573279dd1e4c19fde6efa6d" +"checksum net2 0.2.33 (registry+https://github.com/rust-lang/crates.io-index)" = "42550d9fb7b6684a6d404d9fa7250c2eb2646df731d1c06afc06dcee9e1bcf88" +"checksum nix 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d37e713a259ff641624b6cb20e3b12b2952313ba36b6823c0f16e6cfd9e5de17" +"checksum num-bigint 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3eceac7784c5dc97c2d6edf30259b4e153e6e2b42b3c85e9a6e9f45d06caef6e" +"checksum num-integer 0.1.39 (registry+https://github.com/rust-lang/crates.io-index)" = "e83d528d2677f0518c570baf2b7abdcf0cd2d248860b68507bdcb3e91d4c0cea" +"checksum num-traits 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)" = "630de1ef5cc79d0cdd78b7e33b81f083cbfe90de0f4b2b2f07f905867c70e9fe" +"checksum percent-encoding 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "31010dd2e1ac33d5b46a5b413495239882813e0369f8ed8a5e266f173602f831" +"checksum quoted_printable 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "4126fa98c6d7b166e6a29a24ab96721d618759d803df6a8cb35d6140da475b5a" +"checksum rand 0.5.5 (registry+https://github.com/rust-lang/crates.io-index)" = "e464cd887e869cddcae8792a4ee31d23c7edd516700695608f5b98c67ee0131c" +"checksum rand_core 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "edecf0f94da5551fc9b492093e30b041a891657db7940ee221f9d2f66e82eef2" +"checksum redox_syscall 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)" = "c214e91d3ecf43e9a4e41e578973adeb14b474f2bee858742d127af75a0112b1" +"checksum regex 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "2069749032ea3ec200ca51e4a31df41759190a88edca0d2d86ee8bedf7073341" +"checksum regex-syntax 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "747ba3b235651f6e2f67dfa8bcdcd073ddb7c243cb21c442fc12395dfcac212d" +"checksum ryu 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "7153dd96dade874ab973e098cb62fcdbb89a03682e46b144fd09550998d4a4a7" +"checksum safemem 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e27a8b19b835f7aea908818e871f5cc3a5a186550c30773be987e155e8163d8f" +"checksum serde 1.0.78 (registry+https://github.com/rust-lang/crates.io-index)" = "92ec94e2754699adddbbc4f555791bd3acc2a2f5574cba16c93a4a9cf4a04415" +"checksum serde_json 1.0.27 (registry+https://github.com/rust-lang/crates.io-index)" = "59790990c5115d16027f00913e2e66de23a51f70422e549d2ad68c8c5f268f1c" +"checksum sha1 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "2579985fda508104f7587689507983eadd6a6e84dd35d6d115361f530916fa0d" +"checksum sha2 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)" = "9eb6be24e4c23a84d7184280d2722f7f2731fcdd4a9d886efbfe4413e4847ea0" +"checksum smallvec 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)" = "153ffa32fd170e9944f7e0838edf824a754ec4c1fc64746fcc9fe1f8fa602e5d" +"checksum thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c6b53e329000edc2b34dbe8545fd20e55a333362d0a321909685a19bd28c3f1b" +"checksum time 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)" = "d825be0eb33fda1a7e68012d51e9c7f451dc1a69391e7fdc197060bb8c56667b" +"checksum twox-hash 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4f85be565a110ed72ed7048cf56570db04ce0a592c98aa59b7dacde3e5718750" +"checksum typenum 1.10.0 (registry+https://github.com/rust-lang/crates.io-index)" = "612d636f949607bdf9b123b4a6f6d966dedf3ff669f7f045890d3a4a73948169" +"checksum ucd-util 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "fd2be2d6639d0f8fe6cdda291ad456e23629558d466e2789d2c3e9892bda285d" +"checksum unicode-bidi 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "49f2bd0c6468a8230e1db229cff8029217cf623c767ea5d60bfbd42729ea54d5" +"checksum unicode-normalization 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "6a0180bc61fc5a987082bfa111f4cc95c4caff7f9799f3e46df09163a937aa25" +"checksum unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "382810877fe448991dfc7f0dd6e3ae5d58088fd0ea5e35189655f84e6814fa56" +"checksum url 1.7.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2a321979c09843d272956e73700d12c4e7d3d92b2ee112b31548aef0d4efc5a6" +"checksum utf8-ranges 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "fd70f467df6810094968e2fce0ee1bd0e87157aceb026a8c083bcf5e25b9efe4" +"checksum uuid 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)" = "e1436e58182935dcd9ce0add9ea0b558e8a87befe01c1a301e6020aeb0876363" +"checksum version_check 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "7716c242968ee87e5542f8021178248f267f295a5c4803beae8b8b7fd9bc6051" +"checksum victoria-dom 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "27ef2e353e03efe070c1316f2d04fba6804a40edb694951d6b7fbcbe11283a02" +"checksum void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d" +"checksum winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a" +"checksum winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "773ef9dcc5f24b7d850d0ff101e542ff24c3b090a9768e03ff889fdef41f00fd" +"checksum winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc" +"checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" +"checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/scraper/Cargo.toml b/scraper/Cargo.toml new file mode 100644 index 0000000..9eb2e86 --- /dev/null +++ b/scraper/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "scraper" +version = "0.1.0" +authors = ["Kartikaya Gupta "] + +[dependencies] +mailparse = "0.6.2" +mysql = "14.1.0" +victoria-dom = "0.1.2" +regex = "1" diff --git a/example.config.php b/scraper/config.php.sample similarity index 63% rename from example.config.php rename to scraper/config.php.sample index 66315e0..eb4d423 100644 --- a/example.config.php +++ b/scraper/config.php.sample @@ -1,6 +1,6 @@ /dev/null 2>&1 +for i in $(grep -l "Prepared statement needs to be re-prepared" *.err 2>/dev/null); do + ./pump.sh ${i%.err} + rm $i ${i%.err} +done +for i in $(grep -l "Can't connect to MySQL server on 'db.staktrace.com'" *.err 2>/dev/null); do + ./pump.sh ${i%.err} + rm $i ${i%.err} +done +for i in $(grep -l "Lost connection to MySQL server at 'reading authorization packet'" *.err 2>/dev/null); do + ./pump.sh ${i%.err} + rm $i ${i%.err} +done +for i in $(grep -l "MySQL server has gone away" *.err 2>/dev/null); do + ./pump.sh ${i%.err} + rm $i ${i%.err} +done +for i in $(grep -l "Error connecting to db:" *.err 2>/dev/null); do + ./pump.sh ${i%.err} + rm $i ${i%.err} +done +for i in $(grep -l "into DB" *.err 2>/dev/null); do + ./pump.sh ${i%.err} + rm $i ${i%.err} +done +popd >/dev/null 2>&1 diff --git a/decrypt_mail.awk b/scraper/decrypt_mail.awk similarity index 100% rename from decrypt_mail.awk rename to scraper/decrypt_mail.awk diff --git a/filter.php b/scraper/filter.php old mode 100644 new mode 100755 similarity index 63% rename from filter.php rename to scraper/filter.php index 8ef4819..7fac33d --- a/filter.php +++ b/scraper/filter.php @@ -1,7 +1,7 @@ -#!/usr/local/bin/php +#!/usr/bin/env php ' ); + exit( 0 ); + } } function fail( $message ) { @@ -73,16 +85,10 @@ function success() { $mail = $merged; $bugIsSecure = false; -$fromDomain = substr( $_SERVER['SENDER'], strpos( $_SERVER['SENDER'], '@' ) + 1 ); $bugzillaHeaders = array(); foreach ($mail as $mailLine) { if (strlen( $mailLine ) == 0) { - $matches = array(); - if (preg_match( '/\[Bug \d+\] \(Secure bug/', $bugzillaHeaders[ 'subject' ], $matches ) == 0) { - break; - } - $bugIsSecure = true; - // continue processing since there will be another subject header in the body, followed by another blank line + break; } if (strpos( $mailLine, 'X-Bugzilla-' ) === 0) { $header = substr( $mailLine, strlen( 'X-Bugzilla-' ) ); @@ -103,6 +109,10 @@ function success() { } } +if (checkForField( 'Secure-Email' )) { + $bugIsSecure = true; +} + function checkForField( $key ) { global $bugzillaHeaders; $key = strtolower( $key ); @@ -144,7 +154,7 @@ function normalizeFieldList( $fieldString ) { $currentField = ''; for ($i = 0; $i < count( $words ); $i++) { $word = $words[ $i ]; - if ($word == 'Attachment' /* Created|#abcdef */) { + if ($word == 'Attachment' /* Created|Flags|is|mime */) { if ($i + 1 >= count( $words )) { fail( 'Unrecognized field list (1): ' . print_r( $words, true ) ); } @@ -153,25 +163,24 @@ function normalizeFieldList( $fieldString ) { // ignore "Attachment Created" in the field list since it doesn't have // a corresponding entry in the field table continue; - } else { - /* Flags|is|mime */ + } else if ($words[ $i ] == 'is' /* obsolete */ || $words[ $i ] == 'mime' /* type */) { if ($i + 1 >= count( $words )) { - fail( 'Unrecognized field list (2): ' . print_r( $words, true ) ); + fail( 'Unrecognized field list (3): ' . print_r( $words, true ) ); } $word .= ' ' . $words[ ++$i ]; - if ($words[ $i ] == 'is' /* obsolete */ || $words[ $i ] == 'mime' /* type */) { - if ($i + 1 >= count( $words )) { - fail( 'Unrecognized field list (3): ' . print_r( $words, true ) ); - } - $word .= ' ' . $words[ ++$i ]; - } } } else if ($word == 'Depends' /* On */ || $word == 'Target' /* Milestone */ || $word == 'Ever' /* Confirmed */ || $word == 'Crash' /* Signature */ || $word == 'See' /* Also */ - || $word == 'Last' /* Resolved */) + || $word == 'Last' /* Resolved */ + || $word == 'QA' /* Whiteboard */ + || $word == 'Restrict' /* Comments */ + || $word == 'Due' /* Date */ + || $word == 'Fission' /* Milestone */ + || $word == 'Regressed' /* by */ + || $word == 'Webcompat' /* Priority */) { if ($i + 1 >= count( $words )) { fail( 'Unrecognized field list (4): ' . print_r( $words, true ) ); @@ -179,12 +188,42 @@ function normalizeFieldList( $fieldString ) { $word .= ' ' . $words[ ++$i ]; } else if ($word == 'Status') { if ($i + 1 < count( $words ) && $words[ $i + 1 ] == 'Whiteboard') { - $word .= ' '. $words[ ++$i ]; + $word .= ' ' . $words[ ++$i ]; } - } else if ($word == 'Comment') { - if ($i + 3 < count( $words) && $words[ $i + 2 ] == 'is' && $words[ $i + 3 ] == 'private') { + } else if ($word == 'Comment' /* Created | # is private */) { + if ($i + 1 < count( $words ) && $words[ $i + 1 ] == 'Created') { + // ignore "Comment Created" in the field list since it doesn't have + // a corresponding entry in the field table + $i++; + continue; + } else if ($i + 3 < count( $words ) && $words[ $i + 2 ] == 'is' && $words[ $i + 3 ] == 'private') { $word .= ' ' . $words[ ++$i ] . ' ' . $words[ ++$i ] . ' ' . $words[ ++$i ]; } + } else if ($word == 'User') { + if ($i + 1 < count( $words ) && $words[ $i + 1 ] == 'Story') { + $word .= ' ' . $words[ ++$i ]; + } + } else if ($word == 'Has' /* STR|Regression Range */) { + if ($i + 1 < count( $words ) && $words[ $i + 1 ] == 'STR') { + $word .= ' ' . $words[ ++$i ]; + } else if ($i + 2 < count( $words ) && $words[ $i + 1 ] == 'Regression' && $words[ $i + 2 ] == 'Range') { + $word .= ' ' . $words[ ++$i ] . ' ' . $words[ ++$i ]; + } + } + if ($word == 'Ever Confirmed') { + $word = 'Ever confirmed'; + } else if ($word == 'OS/Version') { + $word = 'OS'; + } else if ($word == 'Platform') { + $word = 'Hardware'; + } else if ($word == 'AssignedTo') { + $word = 'Assignee'; + } else if ($word == 'Status Whiteboard') { + $word = 'Whiteboard'; + } else if ($word == 'QAContact') { + $word = 'QA Contact'; + } else if ($word == 'Last Resolved') { + $word = 'Closed'; } $fields[] = $word; } @@ -208,8 +247,23 @@ function parseChangeTable( $fields, $rows ) { continue; } $matchedStart = false; - if (strpos( $fields[ $ixField ], $col1 ) === 0) { + if (preg_match( '/^Attachment/', $col1 )) { + // Sometimes the attachment number is on the second line. Make sure we pick it up + $col1 .= ' ' . trim( substr( $rows[$i+1], 0, $widths[0] ) ); + // Special case for flags, because the changed header just has "Flags" instead of "Attachment Flags" + if ($fields[ $ixField ] == 'Flags' && preg_match( '/^Attachment #\d+ Flags/', $col1 )) { + $fields[ $ixField ] = $col1; + $matchedStart = true; + // Otherwise insert the attachment number into $fields[$ixField] + } else if (stripos( $fields[ $ixField ], preg_replace( '/Attachment #\d+/i', 'Attachment', $col1 ) ) === 0) { + $fields[ $ixField ] = preg_replace( '/(Attachment #\d+).*/', '\1', $col1 ) . substr( $fields[ $ixField ], strlen( 'Attachment' )); + $matchedStart = true; + } + } else if (stripos( $fields[ $ixField ], $col1 ) === 0) { + // simple case match the start of the field against the column $matchedStart = true; + } + if ($matchedStart) { if ($ixField > 0) { $oldvals[] = trim( $oldval ); $newvals[] = trim( $newval ); @@ -217,7 +271,7 @@ function parseChangeTable( $fields, $rows ) { $oldval = $col2; $newval = $col3; } - if (strpos( $fields[ $ixField ], $col1 ) === strlen( $fields[ $ixField ] ) - strlen( $col1 )) { + if (stripos( $fields[ $ixField ], $col1 ) === strlen( $fields[ $ixField ] ) - strlen( $col1 )) { if (! $matchedStart) { $oldval .= $col2; $newval .= $col3; @@ -246,7 +300,7 @@ function insertChanges( $bug, $date, $reason, &$fields, &$oldvals, &$newvals ) { } } -function saveChanges( $bug, $date, $reason, &$mailString ) { +function saveChanges( $bug, $date, $reason, &$mailString, $requireTable ) { $ret = 0; $fields = normalizeFieldList( getField( 'changed-fields' ) ); if (count( $fields ) == 0) { @@ -256,7 +310,16 @@ function saveChanges( $bug, $date, $reason, &$mailString ) { $matches = array(); $matchCount = preg_match_all( "/\n( *What *\|Removed *\|Added\n-*\n.*?)\n\n/s", $mailString, $matches, PREG_PATTERN_ORDER | PREG_OFFSET_CAPTURE ); if ($matchCount == 0) { - fail( 'No change table' ); + // we might end up here in some cases if the only "field" that changed + // is a comment privacy flag + if (count( $fields ) == 1 && strpos( $fields[0], " is private" ) !== FALSE) { + return $ret; + } + if ($requireTable) { + fail( 'No change table' ); + } else { + return $ret; + } } $tableRows = $matches[1][0][0]; $ret = max( $ret, $matches[0][0][1] + strlen( $matches[0][0][0] ) ); @@ -273,11 +336,15 @@ function saveChanges( $bug, $date, $reason, &$mailString ) { function saveComments( $bug, $date, $reason, &$mailString ) { $matches = array(); - $matchCount = preg_match_all( "/- Comment #(\d+) from ([^<]*) <[^\n]* ---\n(.*)\n\n--/sU", $mailString, $matches, PREG_PATTERN_ORDER ); + $matchCount = preg_match_all( "/- Comment #(\d+) from ([^\n]*) ---\n(.*)\n\n--/sU", $mailString, $matches, PREG_PATTERN_ORDER ); $stmt = prepare( 'INSERT INTO comments (bug, stamp, reason, commentnum, author, comment) VALUES (?, ?, ?, ?, ?, ?)' ); for ($i = 0; $i < $matchCount; $i++) { $commentNum = $matches[1][$i]; $author = $matches[2][$i]; + $author = substr( $author, 0, 0 - strlen( 'YYYY-mm-dd HH:ii::ss ZZZ' ) ); + if (strpos( $author, '<' ) !== FALSE) { + $author = substr( $author, 0, strpos( $author, '<' ) ); + } $comment = $matches[3][$i]; $stmt->bind_param( 'ississ', $bug, $date, $reason, $commentNum, $author, $comment ); $stmt->execute(); @@ -285,38 +352,41 @@ function saveComments( $bug, $date, $reason, &$mailString ) { fail( 'Unable to insert new comment into DB: ' . $stmt->error ); } } + return $matchCount; } function saveDependencyChanges( $bug, $date, $reason, &$mailString ) { - $matches = array(); - if (preg_match( '/Bug (\d+) depends on bug (\d+), which changed state./', $mailString, $matches ) == 0) { - return false; - } - if (strcmp( $bug, $matches[1] ) != 0) { - fail( 'Dependency email did not match bug number' ); - } - $dependentBug = $matches[2]; + $offset = 0; + while (true) { + $matches = array(); + if (preg_match( '/Bug (\d+) depends on bug (\d+), which changed state./', $mailString, $matches, 0, $offset ) == 0) { + break; + } + if (strcmp( $bug, $matches[1] ) != 0) { + fail( 'Dependency email did not match bug number' ); + } + $dependentBug = $matches[2]; - // in this case we don't know the list of field names ahead of time, just that it will be one or more Status - // and Resolution fields. Since these won't line wrap, we can just do a simpler version of parseChangeTable - $matches = array(); - $matchCount = preg_match_all( "/\n *What *\|Old Value *\|New Value\n-*\n(.*?)\n\n/s", $mailString, $matches, PREG_PATTERN_ORDER ); - if ($matchCount != 1) { - fail( 'Found ' . $matchCount . ' change tables in a dependency change email' ); - } - $tableRows = explode( "\n", $matches[1][0] ); - $fields = array(); - $oldvals = array(); - $newvals = array(); - foreach ($tableRows AS $row) { - list( $field, $oldval, $newval ) = explode( '|', $row ); - $fields[] = 'depbug-' . $dependentBug . '-' . trim( $field ); - $oldvals[] = trim( $oldval ); - $newvals[] = trim( $newval ); + // in this case we don't know the list of field names ahead of time, just that it will be one or more Status + // and Resolution fields. Since these won't line wrap, we can just do a simpler version of parseChangeTable + $matches = array(); + if (preg_match( "/\n *What *\|Removed *\|Added\n-*\n(.*?)\n\n/s", $mailString, $matches, PREG_OFFSET_CAPTURE, $offset ) == 0) { + fail( 'Did not find change table corresponding to dependency change for bug ' . $dependentBug ); + } + $tableRows = explode( "\n", $matches[1][0] ); + $fields = array(); + $oldvals = array(); + $newvals = array(); + foreach ($tableRows AS $row) { + list( $field, $oldval, $newval ) = explode( '|', $row ); + $fields[] = 'depbug-' . $dependentBug . '-' . trim( $field ); + $oldvals[] = trim( $oldval ); + $newvals[] = trim( $newval ); + } + insertChanges( $bug, $date, $reason, $fields, $oldvals, $newvals ); + $offset = $matches[1][1] + strlen( $matches[1][0] ); } - insertChanges( $bug, $date, $reason, $fields, $oldvals, $newvals ); - - return true; + return ($offset > 0); } function prepare( $query ) { @@ -338,20 +408,28 @@ function updateMetadata( $date ) { global $bugIsSecure; $matches = array(); if (preg_match( '/\[Bug (\d+)\] (.*)( : \[Attachment.*)?$/sU', getField( 'subject' ), $matches ) > 0) { - $stmt = prepare( 'INSERT INTO metadata (bug, stamp, title, secure) VALUES (?, ?, ?, ?) ' + $stmt = prepare( 'INSERT INTO metadata (bug, stamp, title, secure, note) VALUES (?, ?, ?, ?, ?) ' . 'ON DUPLICATE KEY UPDATE stamp=VALUES(stamp), title=VALUES(title), secure=VALUES(secure)' ); - $stmt->bind_param( 'issi', $matches[1], $date, $matches[2], $bugIsSecure ); - $stmt->execute(); + $note = ""; + if (!$stmt->bind_param( 'sssis', $matches[1], $date, $matches[2], $bugIsSecure, $note )) { + fail( "Binding params failed for metadata: [{$stmt->error}]" ); + } + if (!$stmt->execute()) { + fail( "Executing statement failed for metadata: [{$stmt->error}]" ); + } } } -$bug = getField( 'id' ); $type = getField( 'type' ); +if ($type == 'nag') { + success(); +} +$bug = getField( 'id' ); $date = date( 'Y-m-d H:i:s', getField( 'date' ) ); updateMetadata( $date ); -if (strpos( $mailText, 'This email would have contained sensitive information' ) !== FALSE) { +if ($bugIsSecure) { // you haven't set a PGP/GPG key and this is for a secure bug, so there's no data in it. $reason = normalizeReason( getField( 'reason' ), getField( 'watch-reason' ) ); $fields = array( 'Unknown' ); @@ -360,19 +438,26 @@ function updateMetadata( $date ) { insertChanges( $bug, $date, $reason, $fields, $oldvals, $newvals ); success(); } else if ($type == 'request') { - $matches = array(); + $subject = getField( 'subject' ); + $matches = array(); if (preg_match( '/\[Attachment (\d+)\]/', $mailText, $matches ) == 0) { - fail( 'No attachment id' ); - } - $attachment = $matches[1]; - - if (preg_match( "/Attachment $attachment: (.*)/", $mailString, $matches ) == 0) { - fail( 'No attachment title' ); + if (strpos( $subject, 'needinfo ' ) === FALSE) { + fail( 'Unrecognized request bugmail' ); + } + if (strpos( $subject, 'needinfo requested: [Bug' ) !== FALSE) { + $bugzillaHeaders['flag-requestee'] = $_ME[0]; + } + $attachment = 0; + $title = ''; + } else { + $attachment = $matches[1]; + if (preg_match( "/Attachment $attachment:[ \n](.*)/", $mailString, $matches ) == 0) { + fail( 'No attachment title' ); + } + $title = $matches[1]; } - $title = $matches[1]; - $subject = getField( 'subject' ); if (! checkForField( 'flag-requestee' )) { if (strpos( $subject, ' canceled: [Bug' ) !== FALSE) { $granted = 0; @@ -389,13 +474,13 @@ function updateMetadata( $date ) { $flag = substr( $subject, 0, strpos( $subject, ' ' ) ); if ($cancelled) { - $stmt = prepare( 'UPDATE requests SET cancelled=? WHERE attachment=? AND flag=?' ); - $stmt->bind_param( 'iis', $cancelled, $attachment, $flag ); + $stmt = prepare( 'UPDATE requests SET cancelled=? WHERE bug=? AND attachment=? AND flag=?' ); + $stmt->bind_param( 'iiis', $cancelled, $bug, $attachment, $flag ); $stmt->execute(); // this may cancel something we don't have a record of; if so, ignore success(); } else { - if (preg_match( "/\n(.*) <(.*)> has (?:not )?granted/", $mailString, $matches ) == 0) { + if (preg_match( "/\n\n(.*) <(.*)>\\shas\\s(?:not\\s)?granted/s", $mailString, $matches ) == 0) { fail( 'Unable to determine author of review' ); } $author = $matches[1]; @@ -409,7 +494,7 @@ function updateMetadata( $date ) { } } else { $requestee = getField( 'flag-requestee' ); - if ($requestee != $_ME) { + if (! in_array( $requestee, $_ME )) { fail( 'Requestee is not me' ); } $flag = substr( $subject, 0, strpos( $subject, ' ' ) ); @@ -434,12 +519,12 @@ function updateMetadata( $date ) { $title = trim( $matches[1] ); $author = getField( 'who' ); $matches = array(); - if (preg_match( "/Bug #: .*?\n\n\n(.*\n\n)?-- \n/s", $mailString, $matches ) == 0) { + if (preg_match( "/Bug ID: .*?\n\n(.*\n\n)?-- \n/s", $mailString, $matches ) == 0) { fail( 'No description' ); } $desc = trim( $matches[1] ); - $extracted = saveChanges( $bug, $date, $reason, $desc ); + $extracted = saveChanges( $bug, $date, $reason, $desc, false ); $desc = trim( substr( $desc, $extracted ) ); $stmt = prepare( 'INSERT INTO newbugs (bug, stamp, reason, title, author, description) VALUES (?, ?, ?, ?, ?, ?)' ); @@ -449,13 +534,20 @@ function updateMetadata( $date ) { fail( 'Unable to insert new bug into DB: ' . $stmt->error ); } success(); +} else if ($type == 'dep_changed') { + $reason = normalizeReason( getField( 'reason' ), getField( 'watch-reason' ) ); + if (saveDependencyChanges( $bug, $date, $reason, $mailString )) { + success(); + } + fail( 'Unable to parse dep_changed email' ); } else if ($type == 'changed') { $reason = normalizeReason( getField( 'reason' ), getField( 'watch-reason' ) ); - if (saveChanges( $bug, $date, $reason, $mailString ) == 0) { - saveDependencyChanges( $bug, $date, $reason, $mailString ); + $extracted = saveChanges( $bug, $date, $reason, $mailString, true ); + $comments = saveComments( $bug, $date, $reason, $mailString ); + if ($extracted == 0 && $comments == 0) { + fail( 'Unable to extract meaningful data from changed email' ); } - saveComments( $bug, $date, $reason, $mailString ); success(); } else { diff --git a/scraper/postfix.forward.symtarget b/scraper/postfix.forward.symtarget new file mode 100644 index 0000000..dac2229 --- /dev/null +++ b/scraper/postfix.forward.symtarget @@ -0,0 +1 @@ +"| $HOME/scraper/decrypt_mail.awk | $HOME/scraper/filter.php" diff --git a/scraper/pump.sh b/scraper/pump.sh new file mode 100755 index 0000000..9b0c081 --- /dev/null +++ b/scraper/pump.sh @@ -0,0 +1,4 @@ +#!/usr/bin/env bash + +export SENDER=bugzilla-daemon@mozilla.org +cat $1 | ./decrypt_mail.awk | ./filter.php diff --git a/scraper/scraper.mysql.cnf.sample b/scraper/scraper.mysql.cnf.sample new file mode 100644 index 0000000..b849ca9 --- /dev/null +++ b/scraper/scraper.mysql.cnf.sample @@ -0,0 +1 @@ +mysql://root:password@localhost:3307/mysql diff --git a/scraper/src/main.rs b/scraper/src/main.rs new file mode 100644 index 0000000..d31e5e2 --- /dev/null +++ b/scraper/src/main.rs @@ -0,0 +1,606 @@ +extern crate mailparse; +#[macro_use] +extern crate mysql; +extern crate regex; +extern crate victoria_dom; + +use std::collections::hash_map::DefaultHasher; +use std::env; +use std::fs; +use std::fs::File; +use std::hash::Hasher; +use std::io::{Read, stdin, Write}; +use std::process; +use std::time::{SystemTime, UNIX_EPOCH}; + +use mailparse::{dateparse, MailHeaderMap, MailParseError, ParsedMail}; + +use regex::Regex; + +use victoria_dom::DOM; + +fn save_file(data: &[u8]) -> String { + let time = SystemTime::now().duration_since(UNIX_EPOCH).map(|d| d.as_secs()).unwrap_or(0); + let mut hasher = DefaultHasher::new(); + hasher.write(data); + let uniq_name = format!("{}.{:x}", time, hasher.finish()); + { + let mut file = File::create(&uniq_name).unwrap(); + file.write(data).ok(); + } + uniq_name +} + +fn fail(data_file: &str, msg: &str, err: String) -> ! { + let err_name = format!("{}.err", data_file); + { + let mut file = File::create(err_name).unwrap(); + writeln!(file, "{}", msg).ok(); + writeln!(file, "{:?}", err).ok(); + } + process::exit(0); +} + +fn get_db() -> Result { + let mut mycnf = match env::home_dir() { + Some(path) => path, + None => return Err("Home dir not found".to_string()), + }; + mycnf.push(".bugmash"); + mycnf.push("scraper.mysql.cnf"); + let mut file = File::open(mycnf).map_err(|e| format!("{:?}", e))?; + let mut connstr = String::new(); + let _ = file.read_to_string(&mut connstr).map_err(|e| format!("{:?}", e))?; + mysql::Pool::new(connstr).map_err(|e| format!("{:?}", e)) +} + +fn get_body_with_type(mail: &ParsedMail, body_type: &'static str) -> Result, MailParseError> { + if mail.ctype.mimetype == body_type { + return mail.get_body().map(Some); + } + for subpart in &mail.subparts { + if let Some(x) = get_body_with_type(subpart, body_type)? { + return Ok(Some(x)); + } + } + Ok(None) +} + +fn get_plain_body(mail: &ParsedMail) -> Result, MailParseError> { + get_body_with_type(mail, "text/plain") +} + +fn get_html_body(mail: &ParsedMail) -> Result, MailParseError> { + get_body_with_type(mail, "text/html") +} + +fn url_parts(footer: String) -> Option<(String, String, Option)> { + let prefix = "https://github.com/"; + let types = ["/issues/", "/pull/", "/commit/"]; + let hash = "#"; + let slash = "/"; + + let repo_ix = footer.find(prefix)? + prefix.len(); + let mut issues_ix = None; + let mut issues_len = None; + for t in types.iter() { + match footer[repo_ix..].find(t) { + Some(x) => { + issues_ix = Some(x); + issues_len = Some(t.len()); + break; + } + None => continue, + } + } + let issues_ix = issues_ix? + repo_ix; + let issues_len = issues_len.unwrap(); + let issuenum_ix = issues_ix + issues_len; + let end_ix = footer[repo_ix..].find("\n").map(|ix| ix + repo_ix).unwrap_or(footer.len()); + let hash_ix = footer[issuenum_ix..end_ix].find(hash).or_else(|| footer[issuenum_ix..end_ix].find(slash)).map(|ix| ix + issuenum_ix); + let hash = match hash_ix { + Some(ix) => Some(String::from(footer[ix..end_ix].trim())), + None => None, + }; + + return Some((String::from(&footer[repo_ix..issues_ix]), + String::from(footer[issuenum_ix..hash_ix.unwrap_or(end_ix)].trim()), + hash)); +} + +fn split_footer(msg: &str) -> (String, Option) { + // Avoid trying to match newlines directly since they can be either \r\n or \n + let ix = msg.rfind("You are receiving this because") + .and_then(|ix| msg[0..ix].rfind("-- ")); + match ix { + Some(ix) => (String::from(&msg[0..ix]), Some(String::from(&msg[ix..]))), + None => (String::from(msg), None), + } +} + +fn first_github_url(body: &str) -> Option<(String, String, Option)> { + let prefix = "https://github.com/"; + let ix = body.find(prefix)? + prefix.len(); + let end_ix = ix + body[ix..].find(char::is_whitespace)?; + let urlpath = &body[ix..end_ix]; + let slash = "/"; + let org_end_ix = urlpath.find(slash)?; + let repo_end_ix = org_end_ix + urlpath[org_end_ix..].find(slash)?; + Some((String::from(&urlpath[0..repo_end_ix]), String::from(&urlpath[repo_end_ix + 1..]), None)) +} + +fn scrape_github_mail(mail: &ParsedMail) -> Result<(), String> { + let mut title = mail.headers.get_first_value("Subject").map_err(|e| format!("{:?}", e))?.unwrap_or("(no subject)".to_string()); + title = title.trim_start_matches("Re: ").to_string(); + if title.starts_with("[") { + if let Some(close_bracket) = title.find("]") { + title = title[close_bracket + 1..].trim().to_string(); + } + } + if title.ends_with(")") { + if let Some(issue_start) = title.rfind("(#") { + title = title[0..issue_start].trim().to_string(); + } + } + + let sender = mail.headers.get_first_value("X-GitHub-Sender").map_err(|e| format!("{:?}", e))?.unwrap_or("Unknown".to_string()); + let reason = match mail.headers.get_first_value("X-GitHub-Reason").map_err(|e| format!("{:?}", e))? { + Some(ref s) if s == "review_requested" => "review", + Some(ref s) if s == "ci_activity" => "review", + Some(ref s) if s == "author" => "Reporter", + Some(ref s) if s == "mention" => "CC", + _ => "Watch", + }; + let stamp = mail.headers.get_first_value("Date").map_err(|e| format!("{:?}", e))?.map(|v| dateparse(&v).unwrap_or(0)).unwrap_or(0); + let plain_body = match get_plain_body(mail).map_err(|e| format!("{:?}", e))? { + Some(x) => x, + None => return Err("No plaintext body found".to_string()), + }; + let (comment, footer) = split_footer(&plain_body); + let footer = footer.ok_or("Unable to find footer".to_string())?; + let (repo, issue, hash) = url_parts(footer) + .or_else(|| first_github_url(&plain_body)) + .ok_or("Unable to extract URL parts".to_string())?; + let hash = hash.unwrap_or(String::from("")); + + let id = format!("{}#{}", repo, issue); + let db = get_db()?; + db.prep_exec(r#"INSERT INTO metadata (bug, stamp, title, secure, note) + VALUES (:id, FROM_UNIXTIME(:stamp), :title, 0, "") + ON DUPLICATE KEY UPDATE stamp=VALUES(stamp), title=VALUES(title)"#, params! { + id, + stamp, + title, + }).map_err(|e| format!("{:?}", e))?; + + let result = db.prep_exec(r#"INSERT INTO gh_issues (repo, issue, stamp, reason, hash, author, comment) + VALUES (:repo, :issue, FROM_UNIXTIME(:stamp), :reason, :hash, :sender, :comment)"#, params! { + repo, + issue, + stamp, + reason, + hash, + sender, + comment, + }).map_err(|e| format!("{:?}", e))?; + if result.affected_rows() != 1 { + return Err(format!("Affected row count for gh_issues was {}, not 1", result.affected_rows())); + } + + Ok(()) +} + +fn mail_header(mail: &ParsedMail, header: &str) -> Result, String> { + mail.headers.get_first_value(header).map_err(|e| format!("Unable to read mail header: {:?}", e)) +} + +fn bugmail_header(mail: &ParsedMail, header: &str) -> Result, String> { + mail_header(mail, &format!("X-Bugzilla-{}", header)) +} + +fn bugzilla_normalized_reason(mail: &ParsedMail) -> Result { + let reason = bugmail_header(mail, "Reason")?.ok_or("Unable to find Reason header")?; + let watch_reason = bugmail_header(mail, "Watch-Reason")?; + if reason.find("AssignedTo").is_some() { + Ok("AssignedTo".to_string()) + } else if reason.find("Reporter").is_some() { + Ok("Reporter".to_string()) + } else if reason.find("CC").is_some() { + Ok("CC".to_string()) + } else if reason.find("Voter").is_some() { + Ok("Voter".to_string()) + } else if reason.find("None").is_some() { + if watch_reason.is_some() { + Ok("Watch".to_string()) + } else { + Err(format!("Empty watch reason with reason {}", reason)) + } + } else { + Err(format!("Unrecognized reason {}", reason)) + } +} + +fn insert_changes(db: &mysql::Pool, id: &str, stamp: i64, reason: &str, changes: &[(String, String, String)]) -> Result<(), String> { + let mut stmt = db.prepare(r#"INSERT INTO changes (bug, stamp, reason, field, oldval, newval) + VALUES (:id, FROM_UNIXTIME(:stamp), :reason, :field, :oldval, :newval)"#).map_err(|e| format!("{:?}", e))?; + for (field, oldval, newval) in changes { + stmt.execute(params!{ + id, + stamp, + reason, + field, + oldval, + newval + }).map_err(|e| format!("{:?}", e))?; + } + Ok(()) +} + +fn insert_comments(db: &mysql::Pool, id: &str, stamp: i64, reason: &str, comments: &[(i32, String, String)]) -> Result<(), String> { + let mut stmt = db.prepare(r#"INSERT INTO comments (bug, stamp, reason, commentnum, author, comment) + VALUES (:id, FROM_UNIXTIME(:stamp), :reason, :commentnum, :author, :comment)"#).map_err(|e| format!("{:?}", e))?; + for (commentnum, author, comment) in comments { + stmt.execute(params!{ + id, + stamp, + reason, + commentnum, + author, + comment + }).map_err(|e| format!("{:?}", e))?; + } + Ok(()) +} + +fn bugmail_body(mail: &ParsedMail) -> Result { + let body = match get_html_body(mail).map_err(|e| format!("{:?}", e))? { + Some(x) => x, + None => return Err("No html body found".to_string()), + }; + Ok(DOM::new(&body)) +} + +fn bugmail_body_text(mail: &ParsedMail) -> Result { + get_plain_body(mail) + .map_err(|e| format!("{:?}", e))? + .map(|t| t.replace("\r", "")) + .ok_or("No plaintext body found".to_string()) +} + +fn scrape_bugmail_request(mail: &ParsedMail, db: &mysql::Pool, id: &str, stamp: i64) -> Result<(), String> { + let requestee = bugmail_header(mail, "Flag-Requestee")?; + let flag = mail_header(mail, "Subject")? + .and_then(|s| s.split_ascii_whitespace().next().map(str::to_string)) + .ok_or("Got a request type bugmail with no subject, not implemented yet".to_string())?; + if requestee.is_none() { + let action = mail_header(mail, "Subject")? + .and_then(|s| s.split_ascii_whitespace().nth(1).map(str::to_string)) + .ok_or("Got a request type bugmail with no requestee and unexpected subject".to_string())?; + if action.starts_with("canceled") { + db.prep_exec(r#"UPDATE requests SET cancelled=1 WHERE bug=:id and attachment=:attachment AND flag=:flag"#, params! { + id, + "attachment" => 0, + flag + }).map_err(|e| format!("{:?}", e))?; + return Ok(()); + } + if action.starts_with("granted:") { + let author_email = bugmail_header(mail, "Who")?; + db.prep_exec(r#"INSERT INTO reviews (bug, stamp, attachment, title, flag, author, authoremail, granted, comment) + VALUES (:id, FROM_UNIXTIME(:stamp), :attachment, :title, :flag, :author, :author_email, :granted, :comment)"#, params! { + id, + stamp, + "attachment" => 0, + "title" => "", + flag, + "author" => "", + author_email, + "granted" => 1, + "comment" => "", + }).map_err(|e| format!("{:?}", e))?; + return Ok(()); + } + if action == "not" { + let author_email = bugmail_header(mail, "Who")?; + db.prep_exec(r#"INSERT INTO reviews (bug, stamp, attachment, title, flag, author, authoremail, granted, comment) + VALUES (:id, FROM_UNIXTIME(:stamp), :attachment, :title, :flag, :author, :author_email, :granted, :comment)"#, params! { + id, + stamp, + "attachment" => 0, + "title" => "", + flag, + "author" => "", + author_email, + "granted" => 0, + "comment" => "", + }).map_err(|e| format!("{:?}", e))?; + return Ok(()); + } + return Err("Got a request type bugmail with no requestee, not implemented yet".to_string()); + } + db.prep_exec(r#"INSERT INTO requests (bug, stamp, attachment, title, flag) + VALUES (:id, FROM_UNIXTIME(:stamp), :attachment, :title, :flag)"#, params! { + id, + stamp, + "attachment" => 0, + "title" => "", + flag + }).map_err(|e| format!("{:?}", e))?; + Ok(()) +} + +fn scrape_bugmail_newbug(mail: &ParsedMail, db: &mysql::Pool, id: &str, stamp: i64) -> Result<(), String> { + let reason = bugzilla_normalized_reason(mail)?; + let dom = bugmail_body(mail)?; + let mut title = None; + let author = bugmail_header(mail, "Who")?.ok_or("Couldn't find new bug author".to_string())?; + + let body = bugmail_body_text(mail)?; + let body_re = Regex::new(r"(?s)Bug ID: .*?\n\n(.*\n\n)?-- \n").unwrap(); + let body_m = body_re.captures(&body).ok_or("Unable to capture new bug description".to_string())?; + let description = body_m.get(1).map_or("", |m| m.as_str().trim()); + + let mut tr_opt = dom.at("div.new table tr"); + while let Some(tr) = tr_opt { + let field = tr.at("td.c1") + .map(|d| d.text_all()) + .ok_or("Couldn't find new bug details".to_string())?; + if field.contains("Summary") { + title = tr.at("td.c2").map(|d| d.text_all()); + break; + } + tr_opt = tr.next(); + } + let title = title.ok_or("Couldn't find bug summary in new bug details table".to_string())?; + + db.prep_exec(r#"INSERT INTO newbugs (bug, stamp, reason, title, author, description) + VALUES (:id, FROM_UNIXTIME(:stamp), :reason, :title, :author, :description)"#, params! { + id, + stamp, + "reason" => &reason, + title, + author, + description + }).map_err(|e| format!("{:?}", e))?; + + let changes = scrape_change_table(&dom, "")?; + insert_changes(db, id, stamp, &reason, &changes)?; + + Ok(()) +} + +fn scrape_change_table(dom: &DOM, field_prefix: &str) -> Result, String> { + let mut changes = Vec::new(); + let mut change_row = dom.at("div.diffs tr.head").and_then(|d| d.next()); + while let Some(tr) = change_row { + let field = tr.at("td.c1") + .map(|d| d.text_all()) + .ok_or("Couldn't find bug change table field".to_string())?; + let old = tr.at("td.c2") + .map(|d| d.text_all()) + .ok_or("Couldn't find bug change table old value".to_string())?; + let new = tr.at("td.c2") + .and_then(|d| d.next()) + .map(|d| d.text_all()) + .ok_or("Couldn't find bug change table new value".to_string())?; + changes.push((format!("{}{}", field_prefix, field), old, new)); + change_row = tr.next(); + } + Ok(changes) +} + +fn scrape_bugmail_depchange(mail: &ParsedMail, db: &mysql::Pool, id: &str, stamp: i64) -> Result<(), String> { + let reason = bugzilla_normalized_reason(mail)?; + let dom = bugmail_body(mail)?; + let depbug_text = dom.at("body > b").map(|d| d.text_all()).ok_or("Unable to find bug dependency sentence")?; + let depbug_re = Regex::new(r"(?i)bug (\d+) depends on bug(?: | )(\d+), which changed state.").unwrap(); + let depbug_m = depbug_re.captures(&depbug_text).ok_or("Dependency sentence didn't match expected regex".to_string())?; + if &depbug_m[1] != id { + return Err("Dependency sentence referred to wrong bug id".to_string()); + } + + let depbug = &depbug_m[2]; + let changes = scrape_change_table(&dom, &format!("depbug-{}-", depbug))?; + insert_changes(db, id, stamp, &reason, &changes) +} + +fn scrape_comments(mail: &ParsedMail) -> Result, String> { + let mut comment_tuples = Vec::new(); + let body = bugmail_body_text(mail)?; + let body_re = Regex::new(r"(?sU)-- Comment #(\d+) from ([^\n]*) ---\n(.*)\n\n--").unwrap(); + for capture in body_re.captures_iter(&body) { + let comment_num = &capture[1]; + let mut author = String::from(&capture[2]); + let comment_text = &capture[3]; + let stripped_len = author.len() - " YYYY-mm-dd HH:ii:ss ZZZ".len(); + author.truncate(stripped_len); + if let Some(idx) = author.find('<') { + author.truncate(idx); + } + comment_tuples.push((comment_num.parse::().map_err(|e| format!("{:?}", e))?, author, comment_text.to_string())); + } + Ok(comment_tuples) +} + +fn scrape_bugmail_change(mail: &ParsedMail, db: &mysql::Pool, id: &str, stamp: i64) -> Result<(), String> { + let reason = bugzilla_normalized_reason(mail)?; + let dom = bugmail_body(mail)?; + + let changes = scrape_change_table(&dom, "")?; + insert_changes(db, id, stamp, &reason, &changes)?; + + let comments = scrape_comments(mail)?; + insert_comments(db, id, stamp, &reason, &comments)?; + + if changes.len() == 0 && comments.len() == 0 { + return Err("Unable to extract meaningful data from changed email".to_string()); + } + Ok(()) +} + +fn scrape_bugzilla_mail(bz_type: &str, mail: &ParsedMail) -> Result<(), String> { + if bz_type == "nag" { + return Ok(()); + } + + let secure = bugmail_header(mail, "Secure-Email")?.is_some(); + let id = bugmail_header(mail, "ID")?.ok_or("Unable to find bug id".to_string())?; + let stamp = mail_header(mail, "Date")?.map(|v| dateparse(&v).unwrap_or(0)).unwrap_or(0); + + let subject = mail_header(mail, "Subject")?.ok_or("Unable to find subject header".to_string())?; + let subject_re = Regex::new(r"(?sU)\[Bug \d+\] (.*)( : \[Attachment.*)?$").unwrap(); + let subject_m = subject_re.captures(&subject).ok_or("Subject header didn't match expected regex".to_string())?; + let title = &subject_m[1]; + + let db = get_db()?; + db.prep_exec(r#"INSERT INTO metadata (bug, stamp, title, secure, note) + VALUES (:id, FROM_UNIXTIME(:stamp), :title, :secure, "") + ON DUPLICATE KEY UPDATE stamp=VALUES(stamp), title=VALUES(title), secure=VALUES(secure)"#, params! { + "id" => &id, + stamp, + title, + secure, + }).map_err(|e| format!("{:?}", e))?; + + if bz_type == "request" { + scrape_bugmail_request(mail, &db, &id, stamp) + } else if secure { + // you haven't set a PGP/GPG key and this is for a secure bug, so there's no data in it. + let reason = bugzilla_normalized_reason(mail)?; + let unknown = "Unknown"; + let changes = vec![ (unknown.to_string(), unknown.to_string(), unknown.to_string()) ]; + insert_changes(&db, &id, stamp, &reason, &changes)?; + Ok(()) + } else if bz_type == "new" { + scrape_bugmail_newbug(mail, &db, &id, stamp) + } else if bz_type == "dep_changed" { + scrape_bugmail_depchange(mail, &db, &id, stamp) + } else if bz_type == "changed" { + scrape_bugmail_change(mail, &db, &id, stamp) + } else { + Err(format!("Unknown bugmail type {}", bz_type)) + } +} + +fn scrape_phabricator_mail(mail: &ParsedMail) -> Result<(), String> { + let stamp = mail_header(mail, "Date")?.map(|v| dateparse(&v).unwrap_or(0)).unwrap_or(0); + + let stamps = mail_header(mail, "X-Phabricator-Stamps")?.ok_or("Unable to get stamps header".to_string())?; + let actor_re = Regex::new(r"actor\((.*?)\)").unwrap(); + let actor_m = actor_re.captures(&stamps).ok_or("Stamps header didn't match actor regex".to_string())?; + let actor = &actor_m[1]; + + let reason = match &stamps { + s if s.contains("reviewer(@kats)") => "review", + s if s.contains("author(@kats)") => "Reporter", + _ => "CC", + }; + + let mut plain_body = match get_plain_body(mail).map_err(|e| format!("{:?}", e))? { + Some(x) => x, + None => return Err("No plaintext body found".to_string()), + }; + if let Some(ix) = plain_body.find("\nREVISION DETAIL") { + plain_body = plain_body[0..ix].to_string(); + } + + let subject = mail_header(mail, "Subject")?.ok_or("Unable to find subject header".to_string())?; + + let mut bugzilla = false; + let (phab, title) = if stamps.contains("application(Diffusion)") { + let subject_re = Regex::new(r"Diffusion[^:]*:(.*)").unwrap(); + let subject_m = subject_re.captures(&subject).ok_or("Subject header didn't match diffusion regex".to_string())?; + let diff_re = Regex::new(r"Differential Revision: https://phabricator.services.mozilla.com/(D\d+)").unwrap(); + if let Some(diff_m) = diff_re.captures(&plain_body) { + (diff_m[1].to_string(), subject_m[1].to_string()) + } else { + let bug_re = Regex::new(r"(?i)bug (\d+)").unwrap(); + let bug_m = bug_re.captures(&subject_m[1]).ok_or("Unrecognized diffusion email type".to_string())?; + bugzilla = true; + if let Some(ix) = plain_body.find("\nBRANCHES") { + plain_body = plain_body[0..ix].to_string(); + } + (bug_m[1].to_string(), subject_m[1].to_string()) + } + } else { + let subject_re = Regex::new(r"Differential.* (D\d+): (.*)").unwrap(); + let subject_m = subject_re.captures(&subject).ok_or("Subject header didn't match differential regex".to_string())?; + (subject_m[1].to_string(), subject_m[2].to_string()) + }; + + let db = get_db()?; + if bugzilla { + // Don't update anything on duplicate key, leave bugmail as source of truth + db.prep_exec(r#"INSERT IGNORE INTO metadata (bug, stamp, title, secure, note) + VALUES (:id, FROM_UNIXTIME(:stamp), :title, :secure, "")"#, params! { + "id" => &phab, + stamp, + "title" => &title, + "secure" => false, + }).map_err(|e| format!("{:?}", e))?; + let result = db.prep_exec(r#"INSERT INTO comments (bug, stamp, reason, commentnum, author, comment) + VALUES (:id, FROM_UNIXTIME(:stamp), :reason, 0, :actor, :comment)"#, params! { + "id" => &phab, + stamp, + reason, + actor, + "comment" => &plain_body, + }).map_err(|e| format!("{:?}", e))?; + if result.affected_rows() != 1 { + return Err(format!("Affected row count for comments was {}, not 1", result.affected_rows())); + } + } else { + db.prep_exec(r#"INSERT INTO metadata (bug, stamp, title, secure, note) + VALUES (:id, FROM_UNIXTIME(:stamp), :title, :secure, "") + ON DUPLICATE KEY UPDATE stamp=VALUES(stamp), title=VALUES(title), secure=VALUES(secure)"#, params! { + "id" => &phab, + stamp, + "title" => &title, + "secure" => false, + }).map_err(|e| format!("{:?}", e))?; + let result = db.prep_exec(r#"INSERT INTO phab_diffs (revision, stamp, reason, author, comment) + VALUES (:revision, FROM_UNIXTIME(:stamp), :reason, :actor, :comment)"#, params! { + "revision" => &phab, + stamp, + reason, + actor, + "comment" => &plain_body, + }).map_err(|e| format!("{:?}", e))?; + if result.affected_rows() != 1 { + return Err(format!("Affected row count for phab_diffs was {}, not 1", result.affected_rows())); + } + } + + Ok(()) +} + +fn main() { + let mut input = Vec::new(); + { + let stdin = stdin(); + let mut handle = stdin.lock(); + let len = handle.read_to_end(&mut input); + len.err().map(|e| fail("stdin-read", "Reading stdin failed", format!("{:?}", e))); + } + + let saved_file = save_file(&input); + let mail = mailparse::parse_mail(&input).unwrap_or_else(|e| fail(&saved_file, "Unable to parse mail", format!("{:?}", e))); + + let github_reason = mail.headers.get_first_value("X-GitHub-Reason").unwrap_or_else(|e| fail(&saved_file, "Unable to read mail header", format!("{:?}", e))); + if github_reason.is_some() { + scrape_github_mail(&mail).unwrap_or_else(|e| fail(&saved_file, "Error while scraping github mail", e)); + fs::remove_file(&saved_file).unwrap_or_else(|e| fail(&saved_file, "Error removing file after processing", format!("{:?}", e))); + } + + let bugzilla_type = mail.headers.get_first_value("X-Bugzilla-Type").unwrap_or_else(|e| fail(&saved_file, "Unable to read mail header", format!("{:?}", e))); + if let Some(bz_type) = bugzilla_type { + scrape_bugzilla_mail(&bz_type, &mail).unwrap_or_else(|e| fail(&saved_file, "Error while scraping bugzilla mail", e)); + fs::remove_file(&saved_file).unwrap_or_else(|e| fail(&saved_file, "Error removing file after processing", format!("{:?}", e))); + } + + let phabricator = mail.headers.get_first_value("X-Phabricator-Sent-This-Message").unwrap_or_else(|e| fail(&saved_file, "Unable to read mail header", format!("{:?}", e))); + if phabricator.is_some() { + scrape_phabricator_mail(&mail).unwrap_or_else(|e| fail(&saved_file, "Error while scraping phab mail", e)); + fs::remove_file(&saved_file).unwrap_or_else(|e| fail(&saved_file, "Error removing file after processing", format!("{:?}", e))); + } +} diff --git a/www/common.php b/www/common.php new file mode 100644 index 0000000..6117865 --- /dev/null +++ b/www/common.php @@ -0,0 +1,78 @@ +prepare( 'DELETE FROM tags WHERE bug=?' ); + if ($_DB->errno) fail( 'Error preparing tag deletion: ' . $_DB->error ); + foreach ($newTags AS $bug => $tagList) { + $stmt->bind_param( 's', $bug ); + $stmt->execute(); + if ($stmt->errno) fail( 'Error inserting to metadata: ' . $stmt->error ); + } + $stmt->close(); + + $stmt = $_DB->prepare( 'INSERT INTO tags (bug, tag) VALUES (?, ?)' ); + if ($_DB->errno) fail( 'Error preparing tag insertion: ' . $_DB->error ); + foreach ($newTags AS $bug => $tagList) { + foreach (explode( ',', $tagList ) AS $tag) { + $tag = trim( $tag ); + if (strlen( $tag ) > 0) { + $stmt->bind_param( 'ss', $bug, $tag ); + $stmt->execute(); + if ($stmt->errno) fail( 'Error inserting to metadata: ' . $stmt->error ); + } + } + } + $stmt->close(); +} + +function escapeHTML( $stuff ) { + $stuff = str_replace( '&', '&', $stuff ); + $stuff = str_replace( array( '<', '>', '"' ), array( '<', '>', '"' ), $stuff ); + return $stuff; +} + +function isGithubIssue( $bugid ) { + return (strpos( $bugid, '#' ) !== FALSE); +} + +function isGithubCommit( $bugid ) { + return (strlen( $bugid ) - strpos( $bugid, '#' )) >= 40; +} + +function isPhabDiff( $bugid ) { + return (strval($bugid)[0] == 'D'); +} + +function makeBugLink( $bugid ) { + global $_BASE_URL, $_GH_BASE_URL, $_PHAB_BASE_URL; + + if (isGithubIssue( $bugid )) { + $type = isGithubCommit( $bugid ) ? '/commit/' : '/issues/'; + return sprintf( '%s', + $_GH_BASE_URL . str_replace( '#', $type, $bugid ), + $bugid ); + } else if (isPhabDiff( $bugid )) { + return sprintf( '%s', + $_PHAB_BASE_URL, $bugid, $bugid ); + } else { + return sprintf( '%s', + $_BASE_URL . '/show_bug.cgi?id=' . $bugid, + "Bug " . $bugid ); + } +} + +?> diff --git a/common.php b/www/common.php.bk similarity index 60% rename from common.php rename to www/common.php.bk index 16afb7d..c19551c 100644 --- a/common.php +++ b/www/common.php.bk @@ -13,13 +13,15 @@ } function fail( $message ) { + error_log( $message ); header( 'HTTP/500 Error!' ); print $message; exit( 0 ); } -$BUGMASH_DIR = $_SERVER['DOCUMENT_ROOT'] . '/../mailfilters/' . $_SERVER['SERVER_NAME'] . '/bugmash'; -include_once( $BUGMASH_DIR . '/bugmash.config.php' ); +$BUGMASH_DIR = $_SERVER['DOCUMENT_ROOT'] . '/../scraper'; +include_once( $BUGMASH_DIR . '/config.php' ); +$_GH_BASE_URL = "https://github.com/"; function updateTags( $newTags ) { global $_DB; @@ -27,8 +29,9 @@ function updateTags( $newTags ) { $stmt = $_DB->prepare( 'DELETE FROM tags WHERE bug=?' ); if ($_DB->errno) fail( 'Error preparing tag deletion: ' . $_DB->error ); foreach ($newTags AS $bug => $tagList) { - $stmt->bind_param( 'i', $bug ); + $stmt->bind_param( 's', $bug ); $stmt->execute(); + if ($stmt->errno) fail( 'Error inserting to metadata: ' . $stmt->error ); } $stmt->close(); @@ -38,8 +41,9 @@ function updateTags( $newTags ) { foreach (explode( ',', $tagList ) AS $tag) { $tag = trim( $tag ); if (strlen( $tag ) > 0) { - $stmt->bind_param( 'is', $bug, $tag ); + $stmt->bind_param( 'ss', $bug, $tag ); $stmt->execute(); + if ($stmt->errno) fail( 'Error inserting to metadata: ' . $stmt->error ); } } } @@ -52,4 +56,18 @@ function escapeHTML( $stuff ) { return $stuff; } +//function bugLink( $bugid ) { +// global $_BASE_URL, $_GH_BASE_URL; +// +// if (strpos( $bugid, '#' ) !== FALSE) { +// return sprintf( '%s', +// $_GH_BASE_URL . str_replace( '#', '/issues/', $bugid ), +// $bugid); +// } else { +// return sprintf( '%s', +// $_BASE_URL . '/show_bug.cgi?id=' . $bugid, +// "Bug " . $bugid); +// } +//} + ?> diff --git a/dashboard.php b/www/dashboard.php similarity index 57% rename from dashboard.php rename to www/dashboard.php index 4e60abb..b9bc9c1 100644 --- a/dashboard.php +++ b/www/dashboard.php @@ -2,6 +2,8 @@ include_once( 'common.php' ); +date_default_timezone_set( 'UTC' ); + $_DB = new mysqli( $_MYSQL_HOST, $_MYSQL_USER, $_MYSQL_PASS, $_MYSQL_DB ); if (mysqli_connect_errno()) { fail( 'Error connecting to db: ' . mysqli_connect_error() ); @@ -11,12 +13,13 @@ // handle note and tag updates // -$stmt = $_DB->prepare( 'INSERT INTO metadata (bug, note) VALUES (?, ?) ON DUPLICATE KEY UPDATE note=VALUES(note)' ); +$stmt = $_DB->prepare( 'INSERT INTO metadata (bug, note, stamp) VALUES (?, ?, NOW()) ON DUPLICATE KEY UPDATE note=VALUES(note)' ); if ($_DB->errno) fail( 'Error preparing metadata insert: ' . $_DB->error ); foreach ($_POST AS $key => $value) { if (strncmp( $key, 'note', 4 ) == 0) { - $stmt->bind_param( 'is', intval( substr( $key, 4 ) ), trim( $value ) ); + $stmt->bind_param( 'ss', substr( $key, 4 ), trim( $value ) ); $stmt->execute(); + if ($stmt->errno) fail( 'Error inserting to metadata: ' . $stmt->error ); } } $stmt->close(); @@ -24,7 +27,7 @@ $tagUpdates = array(); foreach ($_POST AS $key => $value) { if (strncmp( $key, 'tags', 4 ) == 0) { - $tagUpdates[ intval( substr( $key, 4 ) ) ] = $value; + $tagUpdates[ substr( $key, 4 ) ] = $value; } } updateTags( $tagUpdates ); @@ -96,12 +99,38 @@ function stripWhitespace( $stuff ) { return preg_replace( '/\s/', '', $stuff ); } +function buglink( $prefix, $bug ) { + global $_BASE_URL, $meta_titles; + return '' . $prefix . $bug . ''; +} + function linkify( $text, $bug ) { global $_BASE_URL; $text = preg_replace( '#(https?://\S+)#i', '$1', $text ); - $text = preg_replace( '/(bug\s+)(\d+)/i', '$1$2', $text ); - $text = preg_replace( '/(bug-)(\d+)/i', '$1$2', $text ); - $text = preg_replace( '/(Attachment #?)(\d+)/i', '$1$2', $text ); + $text = preg_replace_callback( '/(bug\s+)(\d+)/i', function($m) { return buglink($m[1], $m[2]); }, $text ); + $text = preg_replace_callback( '/(bug-)(\d+)/i', function($m) { return buglink($m[1], $m[2]); }, $text ); + $text = preg_replace( '/(Attachment #?)(\d+)/i', '$1$2', $text ); + return $text; +} + +function linkify_gh( $text, $base_repo ) { + global $_GH_BASE_URL; + $text = preg_replace( '#\[(.*?)\]\((https?://.*?)\)#i', '$1', $text ); // markdown links + $text = preg_replace( '@(\W)(\w+/\w+)#(\d+)(\W)@', '$1$2#$3$4', $text ); + $text = preg_replace( '@(\W)#(\d+)(\W)@', '$1#$2$3', $text ); + return $text; +} + +function linkify_phab( $text ) { + $text = preg_replace( '#(https?://\S+)#i', '$1', $text ); + return $text; +} + +function buglinkify( $field, $text ) { + global $_BASE_URL; + if ($field === 'Depends on' || $field === 'Blocks' || $field === 'Regressions' || $field === 'Regressed by') { + $text = preg_replace_callback( '/(\d+)/', function($m) { return buglink('', $m[1]); }, $text ); + } return $text; } @@ -125,6 +154,19 @@ function column( &$reasons ) { } } +function initEmpty( &$blocks, $bug, $stamp ) { + if (!isset( $blocks[ $bug ][ $stamp ])) { + $blocks[ $bug ][ $stamp ] = ''; + } +} + +function safeGet( &$array, $index ) { + if (isset( $array[ $index ] )) { + return $array[ $index ]; + } + return ''; +} + $filterComments = array(); $filterFlags = array(); $numRows = 0; @@ -135,29 +177,44 @@ function column( &$reasons ) { while ($row = $result->fetch_assoc()) { $numRows++; $stamp = strtotime( $row['stamp'] ); - $bblocks[ $row['bug'] ][ $stamp ] .= sprintf( '
%s: %s%s %s%s
', - $row['id'], - escapeHTML( $row['author'] ), - abbrevFlag( $row['flag'] ), - ($row['granted'] ? '+' : '-'), - $_BASE_URL, - $row['bug'], - $row['attachment'], - escapeHTML( $row['title'] ), - (strlen( $row['comment'] ) > 0 ? ' with comments: ' . escapeHTML( $row['comment'] ) : '') ) . "\n"; + initEmpty( $bblocks, $row['bug'], $stamp ); + if (strstr( $row['title'], 'MozReview Request:' )) { + $bblocks[ $row['bug'] ][ $stamp ] .= sprintf( '
%s: %s%s %s%s
', + $row['id'], + escapeHTML( $row['author'] ), + abbrevFlag( $row['flag'] ), + ($row['granted'] ? '+' : '-'), + $_BASE_URL, + $row['attachment'], + escapeHTML( $row['title'] ), + (strlen( $row['comment'] ) > 0 ? ' with comments: ' . escapeHTML( $row['comment'] ) : '') ) . "\n"; + } else { + $bblocks[ $row['bug'] ][ $stamp ] .= sprintf( '
%s: %s%s %s%s
', + $row['id'], + escapeHTML( $row['author'] ), + abbrevFlag( $row['flag'] ), + ($row['granted'] ? '+' : '-'), + $_BASE_URL, + $row['bug'], + $row['attachment'], + escapeHTML( $row['title'] ), + (strlen( $row['comment'] ) > 0 ? ' with comments: ' . escapeHTML( $row['comment'] ) : '') ) . "\n"; + } $reasons[ $row['bug'] ][] = 'review'; $filterComments[ $row['attachment'] ][] = $row['comment']; - $filterFlags[ $row['attachment'] ][] = array( "{$row['flag']}?({$row['authoremail']})", "{$type}" . ($row['granted'] ? '+' : '-') ); + $filterFlags[ $row['attachment'] ][] = array( "{$row['flag']}?({$row['authoremail']})", "{$row['flag']}" . ($row['granted'] ? '+' : '-') ); } $result = loadTable( 'requests' ); while ($row = $result->fetch_assoc()) { $numRows++; $stamp = strtotime( $row['stamp'] ); - $bblocks[ $row['bug'] ][ $stamp ] .= sprintf( '
%sr? %s%s
', + initEmpty( $bblocks, $row['bug'], $stamp ); + $bblocks[ $row['bug'] ][ $stamp ] .= sprintf( '
%s%s? %s%s
', $row['id'], ($row['cancelled'] ? '' : ''), + abbrevFlag( $row['flag'] ), $_BASE_URL, $row['bug'], $row['attachment'], @@ -165,13 +222,16 @@ function column( &$reasons ) { ($row['cancelled'] ? '' : '') ) . "\n"; $reasons[ $row['bug'] ][] = 'request'; - $filterFlags[ $row['attachment'] ][] = array( '', "{$row['flag']}?({$_ME})" ); + foreach ($_ME as $myEmail) { + $filterFlags[ $row['attachment'] ][] = array( '', "{$row['flag']}?({$myEmail})" ); + } } $result = loadTable( 'newbugs' ); while ($row = $result->fetch_assoc()) { $numRows++; $stamp = strtotime( $row['stamp'] ); + initEmpty( $bblocks, $row['bug'], $stamp ); $bblocks[ $row['bug'] ][ $stamp ] .= sprintf( '
New: %s by %s
%s
', $row['id'], $_BASE_URL, @@ -202,12 +262,13 @@ function column( &$reasons ) { $numRows++; $stamp = strtotime( $row['stamp'] ); + initEmpty( $bblocks, $row['bug'], $stamp ); $bblocks[ $row['bug'] ][ $stamp ] .= sprintf( '
%s: %s → %s
', ($hide ? ' style="display: none"' : ''), $row['id'], linkify( escapeHTML( $row['field'] ), $row['bug'] ), - escapeHTML( $row['oldval'] ), - escapeHTML( $row['newval'] ) ) . "\n"; + buglinkify( $row['field'], escapeHTML( $row['oldval'] ) ), + buglinkify( $row['field'], escapeHTML( $row['newval'] ) ) ) . "\n"; $reasons[ $row['bug'] ][] = $row['reason']; } @@ -221,7 +282,8 @@ function column( &$reasons ) { if (isset( $filterComments[ $matches[1] ] )) { foreach ($filterComments[ $matches[1] ] AS $filterComment) { // strip whitespace before comparison because sometimes the emails are formatted differently. stupid bugzilla - if (strpos( stripWhitespace( $row['comment'] ), stripWhitespace( $filterComment ) ) !== FALSE) { + $strippedComment = stripWhitespace( $filterComment ); + if (strlen( $strippedComment ) > 0 && strpos( stripWhitespace( $row['comment'] ), $strippedComment ) !== FALSE) { $hide = true; break; } @@ -232,7 +294,10 @@ function column( &$reasons ) { $numRows++; $stamp = strtotime( $row['stamp'] ); - $isTbplRobot = ($row['author'] == 'TinderboxPushlog Robot'); + $isTbplRobot = (stripWhitespace( $row['author'] ) == 'TBPLRobot') + || (stripWhitespace( $row['author'] ) == 'TreeherderRobot') + || (stripWhitespace( $row['author'] ) == 'IntermittentFailuresRobot'); + initEmpty( $bblocks, $row['bug'], $stamp ); $bblocks[ $row['bug'] ][ $stamp ] .= sprintf( '
%s said:
%s
', ($hide ? 'display: none;' : 'white-space: pre-line;'), ($isTbplRobot ? 'opacity: 0.5;' : ''), @@ -245,34 +310,86 @@ function column( &$reasons ) { $reasons[ $row['bug'] ][] = $row['reason']; } +$result = loadTable( 'gh_issues' ); +while ($row = $result->fetch_assoc()) { + $numRows++; + $stamp = strtotime( $row['stamp'] ); + $bugid = $row['repo'] . '#' . $row['issue']; + initEmpty( $bblocks, $bugid, $stamp ); + if ($row['hash'] == "") { + $bblocks[ $bugid ][ $stamp ] .= sprintf( '
New: %s by %s
%s
', + $row['id'], + $_GH_BASE_URL, + $row['repo'], + strlen( $row['issue'] ) == 40 ? 'commit' : 'issues', + $row['issue'], + escapeHTML( safeGet( $meta_titles, $bugid ) ), + escapeHTML( $row['author'] ), + linkify_gh( escapeHTML( $row['comment'] ), $row['repo'] ) ) . "\n"; + } else { + $bblocks[ $bugid ][ $stamp ] .= sprintf( '
%s said:
%s
', + $row['id'], + escapeHTML( $row['author'] ), + $_GH_BASE_URL, + $row['repo'], + (strncmp( $row['hash'], '#', 1 ) == 0 ? 'issues' : 'pull'), + $row['issue'], + $row['hash'], + linkify_gh( escapeHTML( $row['comment'] ), $row['repo'] ) ) . "\n"; + } + $reasons[ $bugid ][] = $row['reason']; +} + +$result = loadTable( 'phab_diffs' ); +while ($row = $result->fetch_assoc()) { + $numRows++; + $stamp = strtotime( $row['stamp'] ); + $bugid = $row['revision']; + initEmpty( $bblocks, $bugid, $stamp ); + $bblocks[ $bugid ][ $stamp ] .= sprintf( '
%s said:
%s
', + $row['id'], + escapeHTML( $row['author'] ), + $_PHAB_BASE_URL, + $row['revision'], + linkify_phab( escapeHTML( $row['comment'] ) ) ) . "\n"; + $reasons[ $bugid ][] = $row['reason']; +} + foreach ($bblocks AS $bug => &$block) { ksort( $block, SORT_NUMERIC ); $touchTime = key( $block ); - $block = sprintf( '
' - . 'X' - . '%s' - . 'Bug %d %s' + $identifier = (isGithubIssue( $bug ) ? 'gh_' : (isPhabDiff( $bug ) ? 'pd_' : 'bug')) . $bug; + $block = sprintf( '
' + . '' + . '%s' + . '%s %s' . '
' . '
%s
' - . '' + . '' . '
', - ($meta_secure[ $bug ] ? 'secure ' : ''), - $bug, - (in_array($bug, $bugsWithNotes) ? escapeHTML( $meta_notes[ $bug ] . ' | ' . $meta_tags[ $bug ] ) : ''), + (empty( $meta_secure[ $bug ] ) ? '' : 'secure '), + $identifier, + (in_array($bug, $bugsWithNotes) ? escapeHTML( safeGet( $meta_notes, $bug ) . ' | ' . safeGet( $meta_tags, $bug ) ) : ''), $bug, (in_array($bug, $bugsWithNotes) ? 'U' : 'N'), - $_BASE_URL, - $bug, - $bug, - escapeHTML( $meta_titles[ $bug ] ), + makeBugLink( $bug ), + escapeHTML( safeGet( $meta_titles, $bug ) ), implode( "\n", $block ), - $bug ) . "\n"; - $columns[ column( $reasons[ $bug ] ) ][ $touchTime ] .= $block; + $identifier, + $identifier, + $identifier, + $identifier ) . "\n"; + $col = column( $reasons[ $bug ] ); + initEmpty( $columns, $col, $touchTime ); + $columns[ $col ][ $touchTime ] .= $block; } $_DB->close(); $errors = 0; -$files = scandir( $BUGMASH_DIR ); +$files = scandir( $_SERVER['DOCUMENT_ROOT'] . "/../../" ); foreach ($files AS $file) { if (strpos( strrev( $file ), "rre." ) === 0) { $errors++; @@ -336,7 +453,7 @@ function column( &$reasons ) { background-color: red; color: white; } -a.wipe { +a.wipe, a.wipetop { float: right; margin-left: 3px; vertical-align: top; @@ -401,13 +518,14 @@ function wipe(e) { } }, true ); - function addNote( bugnumber ) { + function addNote( bugid ) { var notediv = document.createElement( "div" ); notediv.className = "newnote"; var sibling = document.getElementById( "notebuttons" ); sibling.parentNode.insertBefore( notediv, sibling ); - notediv.innerHTML = 'Bug : '; - if (bugnumber) { + var prefix = bugid.includes( '#' ) ? '' : 'Bug '; + notediv.innerHTML = '' + prefix + ': '; + if (bugid) { notediv.getElementsByTagName( "input" )[1].focus(); } else { notediv.getElementsByTagName( "input" )[0].focus(); @@ -419,29 +537,39 @@ function setNoteNames() { while (newnotes.length > 0) { var newnote = newnotes[0]; var bugnumbertext = newnote.getElementsByTagName( "input" )[0].value; - var bugnumber = parseInt( bugnumbertext ); - if (isNaN( bugnumber )) { - if (window.confirm( "Unable to parse " + bugnumbertext + " as a bug number; replace with 0 and continue anyway?" )) { - bugnumber = 0; - } else { - return false; + var bugid; + if (bugnumbertext.includes("#")) { + // GH issue + bugid = bugnumbertext; + var anchor = document.createElement( "a" ); + anchor.setAttribute( "href", "" + bugnumbertext.replace( '#', '/issues' ) ); + anchor.textContent = bugnumbertext; + } else { + // bugzilla bug + bugid = parseInt( bugnumbertext ); + if (isNaN( bugid )) { + if (window.confirm( "Unable to parse " + bugnumbertext + " as a bug number; replace with 0 and continue anyway?" )) { + bugid = 0; + } else { + return false; + } } + var anchor = document.createElement( "a" ); + anchor.setAttribute( "href", "/show_bug.cgi?id=" + bugid ); + anchor.textContent = "Bug " + bugid; } - var anchor = document.createElement( "a" ); - anchor.setAttribute( "href", "/show_bug.cgi?id=" + bugnumber ); - anchor.textContent = "Bug " + bugnumber; newnote.replaceChild( anchor, newnote.getElementsByTagName( "span" )[0] ); - newnote.getElementsByTagName( "input" )[0].setAttribute( "name", "note" + bugnumber ); - newnote.getElementsByTagName( "input" )[1].setAttribute( "name", "tags" + bugnumber ); + newnote.getElementsByTagName( "input" )[0].setAttribute( "name", "note" + bugid ); + newnote.getElementsByTagName( "input" )[1].setAttribute( "name", "tags" + bugid ); newnote.className = "note"; } return true; } - function noteify( linkElement, bugnumber ) { + function noteify( linkElement, bugid ) { var notes = document.getElementsByClassName( "note" ); // see if we can find a note already for this bug and just give it focus - var search = "Bug " + bugnumber; + var search = bugid.includes("#") ? bugid : "Bug " + bugid; for (var i = 0; i < notes.length; i++) { if (notes[i].firstChild.textContent == search) { notes[i].getElementsByTagName( "input" )[0].focus(); @@ -451,13 +579,13 @@ function noteify( linkElement, bugnumber ) { // also search through the newly-added notes that are in a different format notes = document.getElementsByClassName( "newnote" ); for (var i = 0; i < notes.length; i++) { - if (notes[i].getElementsByTagName( "input" )[0].value == bugnumber) { + if (notes[i].getElementsByTagName( "input" )[0].value == bugid) { notes[i].getElementsByTagName( "input" )[1].focus(); return false; } } // couldn't find it, so add a new one - addNote( bugnumber ); + addNote( bugid ); linkElement.textContent = 'U'; return false; } @@ -466,7 +594,7 @@ function noteify( linkElement, bugnumber ) { ', "\n"; if (count( $buglist ) > 0) { ksort( $buglist, SORT_NUMERIC ); @@ -482,19 +610,17 @@ function noteify( linkElement, bugnumber ) {
Bug notes Bug %d: ' - . '' - . ' ' +foreach ($bugsWithNotes AS $bugid) { + echo sprintf( '
%s: ' + . '' + . ' ' . '%s
', - $_BASE_URL, - $bug, - $bug, - $bug, - escapeHTML( $meta_notes[ $bug ] ), - $bug, - escapeHTML( $meta_tags[ $bug ] ), - escapeHTML( $meta_titles[ $bug ] ) ), + makeBugLink( $bugid ), + $bugid, + escapeHTML( safeGet( $meta_notes, $bugid ) ), + $bugid, + escapeHTML( safeGet( $meta_tags, $bugid ) ), + escapeHTML( safeGet( $meta_titles, $bugid ) ) ), "\n"; } ?> diff --git a/www/index.html b/www/index.html new file mode 100644 index 0000000..2be5171 --- /dev/null +++ b/www/index.html @@ -0,0 +1 @@ +Bugmash! diff --git a/search.php b/www/search.php similarity index 94% rename from search.php rename to www/search.php index c8eb9e4..3c26b6d 100644 --- a/search.php +++ b/www/search.php @@ -1,6 +1,7 @@ array( 'field', 'oldval', 'newval' ), 'comments' => array( 'author', 'comment' ), 'newbugs' => array( 'title', 'author', 'description' ), - 'metadata' => array( 'title' ) + 'metadata' => array( 'title' ), + 'gh_issues' => array( 'author', 'comment' ), ); function lengthSort( $a, $b ) { @@ -184,12 +186,10 @@ function formatHits( $text, $terms, $isTitle ) { } else { $timestamps[ $matchRow['bug'] ] = max( $timestamps[ $matchRow['bug'] ], $timestamp ); } - if (! $metaHit) { - foreach ($_SEARCH_COLUMNS[ $matchRow['table'] ] AS $column) { - $hit = formatHits( $matchRow[ $column ], $terms, false ); - if ($hit) { - $results[ $matchRow['bug'] ][] = $hit; - } + foreach ($_SEARCH_COLUMNS[ $matchRow['table'] ] AS $column) { + $hit = formatHits( $matchRow[ $column ], $terms, false ); + if ($hit) { + $results[ $matchRow['bug'] ][] = $hit; } } } @@ -239,10 +239,8 @@ function formatHits( $text, $terms, $isTitle ) { if (! $formattedTitle) { $formattedTitle = escapeHTML( $meta_titles[ $bug ] ); } - echo sprintf( ' ', - $_BASE_URL, - $bug, - $bug, + echo sprintf( '
%s %s
', + makeBugLink( $bug ), $formattedTitle ), "\n"; foreach ($hits AS $hit) { echo '
', $hit, '
', "\n"; diff --git a/tags.php b/www/tags.php similarity index 95% rename from tags.php rename to www/tags.php index 6094df8..1a9029e 100644 --- a/tags.php +++ b/www/tags.php @@ -2,7 +2,7 @@ include_once( 'common.php' ); -if (! (isset( $_POST['user'] ) && $_POST['user'] == $_ME)) { +if (! (isset( $_POST['user'] ) && in_array( $_POST['user'], $_ME ))) { fail( 'Incorrect user: ' . $_POST['user'] ); } diff --git a/wipe.php b/www/wipe.php similarity index 83% rename from wipe.php rename to www/wipe.php index e599b43..9e4d5ad 100644 --- a/wipe.php +++ b/www/wipe.php @@ -13,7 +13,7 @@ $ids = explode( ',', $_POST['ids'] ); foreach ($ids AS $id) { - switch ($id{0}) { + switch ($id[0]) { case 'r': $table = 'reviews'; break; @@ -29,6 +29,12 @@ case 'c': $table = 'comments'; break; + case 'g': + $table = 'gh_issues'; + break; + case 'p': + $table = 'phab_diffs'; + break; } $rowId = intval( substr( $id, 1 ) ); $_DB->query( "UPDATE {$table} SET viewed=1 WHERE id={$rowId}" );