Amazon/Epinions user review parser
From CSE330 Wiki
Contents |
[edit] Amazon/Epinions user review parser
The Goal is to be able to remove "bad" reviews from Amazon and the epinions website whenever a user visits that website.
[edit] Design Process
I'm using the greasemonkey Firefox extension when my own user scripts to be able to edit html client side.
Bad reviews (low ranking reviewers and 1 line reviews)
[edit] Relevant links
[1]Greasemonky Blog
[2]Amazon
[3] Epinions
[edit] Current Script
Currently bad reviews are filtered out by putting users onto an ignore list, the review length, and words that a review might contain. Also users can get a choice of whether they want to keep a review or remove that review.
the confirmation choice can be removed by editing the script. Additionally can be configured to keep pros and cons from removed reviews.
Currently works on epinions.com when you display the reviews for a product
// ==UserScript==
// @name Epinions Review Filter
// @namespace http://diveintogreasemonkey.org/download
// @description Filters out "bad" reviews in epinions
// @include http://www.epinions.com/*/display_~reviews
// ==/UserScript==
var charcountmin = 300; // minimum number of char to be considered a valid review
var ignore_list = " bobcat7677 bob joe ladybrent "; // make sure there is a space between each user name and at the beginning and the ending of the string
var bad_word_array = new Array("lame", "sucks", "thru");
var ask_confirmation = 1; // set to 0 so that no confirmation is asked 1 for confirmation
var keep_pros_cons = 1;
var keep_pros_cons_from_ignore_list = 0;
var reviews;
var rev;
var block, titleblock, infoblock;
var userblock;
var user, title;
var text, procon;
reviews = document.evaluate("//table//tr[@bgcolor='white']", document, null, XPathResult.UNORDERED_NODE_SNAPSHOT_TYPE, null);
for (var x = 0; x < reviews.snapshotLength; x++){
rev = reviews.snapshotItem(x);
block = rev.childNodes[3];
titleblock = block.childNodes[1];
title = titleblock.childNodes[0];
userblock = titleblock.childNodes[2];
user = userblock.childNodes[1].textContent;
infoblock = block.childNodes[3];
procon = infoblock.childNodes[0].textContent;
text = infoblock.childNodes[2].textContent;
var user_page = "http://www.epinions.com/user-" + user;
var textbool = checkText(text);
var userbool = checkBadUser(user);
var badtextbool = checkBadWord(text);
if (textbool ==0 || userbool ==0 || badtextbool==0){
var err_str = "Do you want to delete the review by: " + user;
err_str+= "\n\n\nThis review is up for consideration because: ";
if (textbool ==0)
err_str += "\n-The review was too short and is unlikely to" +
"\n provide constructive comments.";
if(badtextbool ==0)
err_str+= "\n-The review contains a word that indicates"+
"\n that it is poorly written and may be" +
"\n unconstructive.";
if (userbool ==0)
err_str += "\n-"+user+" is on your ignore list.";
if (ask_confirmation ==1){
if (confirm(err_str)){
if (keep_pros_cons==1)
if (userbool == 0 && keep_pros_cons_from_ignore_list == 0){
rev.parentNode.removeChild(rev);
}else // keep pro con
infoblock.removeChild(infoblock.childNodes[2])
else
rev.parentNode.removeChild(rev);
}
}else{
if (keep_pros_cons==1)
if (userbool == 0 && keep_pros_cons_from_ignore_list == 0){
rev.parentNode.removeChild(rev);
}else // keep pro con
infoblock.removeChild(infoblock.childNodes[2])
else
rev.parentNode.removeChild(rev); }
}
}
function checkText(text){
var length = text.length;
if (text.charAt(length - 2) == '.' && //checks if a review is long enough to continue onto another page
text.charAt(length-3) == '.' && // i know this can be tricked but shouldn't be much of an issue
text.charAt(length-4) == '.'){
return 1;
}else if (length > charcountmin) //makes sure that the message is a minimum length to root out unproductive comments
return 1;
return 0;
}
function checkBadUser(name){
var ind = ignore_list.search(' ' + name + ' ');
if (ind !=-1)
return 0;
return 1;
}
function checkBadWord(text){
var i;
for (i in bad_word_array){
var ind = text.search(bad_word_array[i]);
if (ind !=-1)
return 0;
}
return 1;
}

