Amazon/Epinions user review parser

From CSE330 Wiki

Jump to: navigation, search

Contents

[edit] Amazon/Epinions user review parser

The Goal is to be able to remove "bad" reviews from Amazon and the epinions website whenever a user visits that website.

[edit] Design Process

I'm using the greasemonkey Firefox extension when my own user scripts to be able to edit html client side.

Bad reviews (low ranking reviewers and 1 line reviews)

[edit] Relevant links

[1]Greasemonky Blog

[2]Amazon

[3] Epinions

[edit] Current Script

Currently bad reviews are filtered out by putting users onto an ignore list, the review length, and words that a review might contain. Also users can get a choice of whether they want to keep a review or remove that review.

the confirmation choice can be removed by editing the script. Additionally can be configured to keep pros and cons from removed reviews.

Currently works on epinions.com when you display the reviews for a product

// ==UserScript==
// @name           Epinions Review Filter
// @namespace      http://diveintogreasemonkey.org/download
// @description    Filters out "bad" reviews in epinions
// @include        http://www.epinions.com/*/display_~reviews
// ==/UserScript==


var charcountmin = 300; // minimum number of char to be considered a valid review
var ignore_list = " bobcat7677 bob joe ladybrent "; // make sure there is a space between each user name and at the beginning and the ending of the string

var bad_word_array = new Array("lame", "sucks", "thru");

var ask_confirmation = 1; // set to 0 so that no confirmation is asked 1 for confirmation
var keep_pros_cons = 1;
var keep_pros_cons_from_ignore_list = 0;



var reviews;
var rev;
var block, titleblock, infoblock;
var userblock;
var user, title;
var text, procon;

reviews = document.evaluate("//table//tr[@bgcolor='white']", document, null, XPathResult.UNORDERED_NODE_SNAPSHOT_TYPE, null);

for (var x = 0; x < reviews.snapshotLength; x++){

	rev = reviews.snapshotItem(x);
	block = rev.childNodes[3];
	titleblock = block.childNodes[1];
	title = titleblock.childNodes[0];
	userblock = titleblock.childNodes[2];
	user = userblock.childNodes[1].textContent;
	
	infoblock = block.childNodes[3];
	procon = infoblock.childNodes[0].textContent;
	text = infoblock.childNodes[2].textContent;
	
	var user_page = "http://www.epinions.com/user-" + user;

	var textbool = checkText(text);
	var userbool = checkBadUser(user);
	var badtextbool = checkBadWord(text);
	if (textbool ==0 || userbool ==0 || badtextbool==0){
		var err_str = "Do you want to delete  the review by: " + user;
		err_str+= "\n\n\nThis review is up for consideration because: ";
		if (textbool ==0)
		   err_str += "\n-The review was too short and is unlikely to" +
			      "\n  provide constructive comments.";

		if(badtextbool ==0)
		   err_str+= "\n-The review contains a word that indicates"+
			     "\n  that it is poorly written and may be" +
       	    	             "\n  unconstructive.";

		if (userbool ==0)
		   err_str += "\n-"+user+" is on your ignore list.";

		if (ask_confirmation ==1){
			if (confirm(err_str)){
				if (keep_pros_cons==1)
					if (userbool == 0 && keep_pros_cons_from_ignore_list == 0){
						rev.parentNode.removeChild(rev);		
					}else // keep pro con
						infoblock.removeChild(infoblock.childNodes[2])
				else
					rev.parentNode.removeChild(rev);		
			}
		}else{
				if (keep_pros_cons==1)
					if (userbool == 0 && keep_pros_cons_from_ignore_list == 0){
						rev.parentNode.removeChild(rev);		
					}else // keep pro con
						infoblock.removeChild(infoblock.childNodes[2])
				else
					rev.parentNode.removeChild(rev);				}
		
	}


}


function checkText(text){
	var length = text.length;
	if (text.charAt(length - 2) == '.' && //checks if a review is long enough to continue onto another page
		text.charAt(length-3) == '.' && // i know this can be tricked but shouldn't be much of an issue
		text.charAt(length-4) == '.'){
		return 1;
	}else if (length > charcountmin) //makes sure that the message is a minimum length to root out unproductive comments
		return 1;

	return 0;

}

function checkBadUser(name){
	var ind = ignore_list.search(' ' + name + ' ');
	if (ind !=-1)
		return 0;
	
	return 1;
}

function checkBadWord(text){
	var i;
	for (i in bad_word_array){
		var ind = text.search(bad_word_array[i]);
		if (ind !=-1)
			return 0;
	}

	return 1;


}


Personal tools