import java.net.URL;
import com.sun.syndication.io.XmlReader;
import org.jdom.Document;
import org.jdom.Element;
import org.jdom.input.SAXBuilder;
import java.util.*;
import java.io.*;
import org.jdom.filter.*;
public class FeedParserByProduct
{
public static void main(String[] args) throws Exception
{
File f = new File("c:\\temp\\testdata"+System.currentTimeMillis()+".txt");
f.createNewFile();
FileWriter fw=new FileWriter(f);
//ListlRev=getReviewData("B0013O98SW","Windows-OS-Operating System");
//ListlRev=getReviewData("B00022PTT8","Windows-OS-Operating System");
//ListlRev=getReviewData("B000KRG6P6","Wolf-Shirt");
//ListlRev=getReviewData("B00193QFFG","Monitor-3star");
ListlRev=getReviewData(args[0],args[1]);
appendReviewersDataToReviewData(lRev);
fw.write(lRev.get(0).getRowHeaders()+"\r\n");
for(int i=0;i{
fw.write(lRev.get(i).getRawDataRowFormat()+"\r\n");
}
System.out.println("Composite RealScore1 is:"+getMeanRealScore1Composite(lRev));
//System.out.println("done");
fw.flush();
}
public static double getMeanRealScore1Composite(Listrevs) throws Exception
{
double d=0;
for(int i=0;i{
d+=revs.get(i).getRealScore1();
}
return d/revs.size();
}
static void appendReviewersDataToReviewData(ListrevList)throws Exception
{
for (int i=0;i{
Review r = revList.get(i);
Document d =getReviewerData(r.ReviewerID);
appendReviewerToReview(d,r);
}
}
private static void appendReviewerToReview(Document d, Review r)
{
int runningStarTotal=0;
int numberOfFives=0;
int keyWordAppears=0;
int totalHelpfulVotes=0;
int totalVotes=0;
int holder=0;
Element e = d.getRootElement();
ElementFilter ef = new ElementFilter( "Customers", null );
List el = e.getContent( ef );
e=(Element)el.get(0);
ef = new ElementFilter( "Customer", null );
el = e.getContent( ef );
e=(Element)el.get(0);
ef = new ElementFilter( "CustomerReviews", null );
el = e.getContent( ef );
e=(Element)el.get(0);
ef = new ElementFilter( "TotalReviews", null );
el = e.getContent( ef );
Element esub=null;
esub=(Element)el.get(0);
String sx =esub.getText();
holder=Integer.parseInt(sx);
r.reviewer_TotalReviews=holder;
ef = new ElementFilter( "Review", null );
el=e.getContent( ef );
//System.out.println(el.size());
for(int i=0;i{
esub=(Element)el.get(i);
//get the rating
ef=new ElementFilter( "Rating", null );
el=esub.getContent(ef);
holder=Integer.parseInt(((Element)el.get(0)).getText());
runningStarTotal=runningStarTotal+holder;
if(holder==5){numberOfFives +=1;}
//get HelpfulVotes
ef=new ElementFilter( "HelpfulVotes", null );
el=esub.getContent(ef);
holder=Integer.parseInt(((Element)el.get(0)).getText());
totalHelpfulVotes=runningStarTotal+holder;
//Get Total Votes
ef=new ElementFilter( "TotalVotes", null );
el=esub.getContent(ef);
holder=Integer.parseInt(((Element)el.get(0)).getText());
totalVotes=runningStarTotal+holder;
//Check to see if any key words appear in review
//deprecated as it seems that multiple reviews on the same subject can be good or bad
}
r.reviewer_TotalReviewPoints=runningStarTotal;
r.reviewer_TotalFives=numberOfFives;
//keyWordAppears=0;
r.reviewer_HelpfulVotes= totalHelpfulVotes;
r.reviewer_TotalVotes=totalVotes;
}
/**
*
* @param Reviews- Modefies review data list to enrich the profile
*/
static Document getReviewerData(String ReviewerID) throws Exception
{
Document doc=getXMLDoc("http://ecs.amazonaws.com/onca/xml?" +
"Service=AWSECommerceService&" +
"AWSAccessKeyId=AKIAJKIZAU3BXLEUC4DA&" +
"AssociateTag=dvshchyokin@hotmail.com&" +
"Operation=CustomerContentLookup&" +
"CustomerId="+ReviewerID+
"&ResponseGroup=CustomerReviews");
return doc;
}
private static Document getXMLDoc(String s) throws Exception
{
//System.out.println(s);
LinkedListret = new LinkedList ();
//String s ="http://ecs.amazonaws.com/onca/xml?Service=AWSECommerceService&AWSAccessKeyId=AKIAJKIZAU3BXLEUC4DA&Operation=ItemLookup&ItemId=B0013O98SW&ResponseGroup=Reviews&Version=2008-08-19";
URL feedUrl = new URL(s);
XmlReader xmlr=new XmlReader(feedUrl);
Document doc = null;
SAXBuilder sb = new SAXBuilder();
doc = sb.build(feedUrl);
return doc;
}
static ListgetReviewData(String productID, String pKeyWords) throws Exception
{
LinkedListret= new LinkedList();
for(int j=1;j<50;j++)
{
try{
Document doc=getXMLDoc("http://ecs.amazonaws.com/onca/xml?Service=AWSECommerceService&AWSAccessKeyId=[USE YOUR OWN ACCESS KEY]&Operation=ItemLookup&ItemId="+ productID +"&ResponseGroup=Reviews&Version=2008-08-19&ReviewPage="+Integer.toString(j));
Element e =doc.getRootElement();
ElementFilter ef = new ElementFilter( "Items", null );
List el = e.getContent( ef );
e=(Element)el.get(0);
ef = new ElementFilter( "Item", null );
el = e.getContent( ef );
e=(Element)el.get(0);
ef = new ElementFilter( "CustomerReviews", null );
el = e.getContent( ef );
e=(Element)el.get(0);
ef = new ElementFilter( "Review", null );
el = e.getContent( ef );
String temp;
for(int i=0;i{
temp=concatReviewData((Element)el.get(i));
ret.add(new Review(temp,pKeyWords));
}
}
catch(Exception ex){break;}
}
return ret;
}
static String concatReviewData(Element e)
{
String temp="";
List el =e.getContent();
for(int i=0;i{
temp=temp+((Element)el.get(i)).getText()+"|";
}
ElementFilter ef = new ElementFilter( "Reviewer", null );
el = e.getContent( ef );
e=(Element)el.get(0);
el=e.getContent();
for(int j=0;j{
temp=temp+((Element)el.get(j)).getText()+"|";
}
//System.out.println(temp);
return temp;
}
}
Thursday, July 9, 2009
A Simple example for working using JDOM as a client for Amazon Advertising API
Tuesday, June 30, 2009
Working Outline of the Paper
Outline for: Credibility on the Internet- The war against guerrilla marketing
Thesis- recommendations on the Internet can no longer be trusted at face value. The solution is to find ways to score the reliability of a review using a model similar to the way credit scoring is done, using information gleaned from other parts of the Internet to find information about the poster of the interview to rate how credible a review is.
High Level Outline:
What is credibility
Where and how to obtain credibility data
Some proposed credibility models
Some informal (read fixed) test of the proposed credibility models
What is needed to improve online credibility
(Appendix A) Who/What is the “enemy” of credibility
What is credibility
Defining the Output 3 contributing factors to credibility
A review's credibility can be rated in 4 different dimensions
Objectivity-Does the review come from an actual user or does it come from a bot or a guerilla.
Suitability- Is the reviewer qualified to give advice on the product or service he is reviewing.
For example: if the quality of a given company's diamonds is reviewed by someone who doesn't know jewelry ... the review is not credible, even if the reviewer was “objective”
Timeliness- How timely is the information in the review
For example, does the review cover the current version of a piece of software or the last version.
How much does a product change over time
Varies from product category to product category
Where and how to obtain credibility data
To catch a marketer (patterns of guerilla marketers)
Social Networks (groups and individuals)
Using opensocial to check social networks
Forums
Groups
Facebook
Yahoo
Google
Amazon
E-Commerce API
Search Engines
google can search for hits by type of content (i.e. given screenname x, find where it appears on: review sites, forums,web pages...) ex...http://www.google.com/#q=%20ivan_rous&hl=en&sa=G&tbo=1&tbs=rvw:1,qdr:w&fp=2kCy_h2xdw8
Links with that specific ID in it (a lot is good...to many is bad)
- Text patterns surrounding the link...i.e. are there forum posts with text repeated (2 times isn't bad many many times is)
Tuesday, May 26, 2009
So we have 4 criteria, now for some approaches to each...
a)Interesting article by Chris Messina, one of the topics it covers is how to establish identity (a component of credibility), one interesting approach is that:
- Suppose you are trying to decide if PersonX is really PersonX
- Compare your social graph with his
- if you know some of the same people that he knows, in a certain domain then PersonX is likely to be who he says he is.
- If I say I am PersonX on twitter, and I am personX on facebook and I am personX on Google, and my profiles on each service match, then I am likely PersonX
- For additional security, you can compare social graphs!
Scrap Outline 1, Heres a better topic (IMHO)
Research would be in the area of: The viability of a better recommendation engine by using the opensocial API. specifically... recommendations should have at least these dimensions:
- Credibility: Is the reviewer who he says he is, is that 5 star review for the new camera from a real, satisfied, customer or from the guerrilla marketing department at Sony
- Suitability: Can't think of a better one word description...but does the reviewers taste and skill level match yours?I.e. the kind of Football Helmet Peyton Manning uses, may not be the best football helmet for your Pop Warner aged son, especially if your Pop Warner aged son is a Defensive Lineman instead of a quarterback. Or, if a South Indian says the food at a restaurant was bland, that may mean it is too spicy for you!
- Timeliness: Just because the 1985 Model of the Car was great does not mean the 2009 model was great
- Reliability: Most products are reviewed as new. How well do the last? How does their performance, effectiveness change over time? This need not be a physical break down, suppose your computer has a brand new 133MHZ mother board, it will work...but...
Sunday, May 24, 2009
Topic Update
Reddit did not work, not a single response to my post. Oh well, I will try again.
But the next attempt will be on a different topic as I have been assigned a topic to write about.
My assignment is...Open Social.
I guess I will start by making an outline (I'll update this post as it happens)
1. What is Open Social
- Intro
- Containers
- Client Side API (js)
- Server Side API (java)- Open Rhino?
- How it is currently being used
- Limitations
Saturday, May 23, 2009
Using Reddit to Help Me Decide on a Topic
So I am taking a class on Business and Social media, I can have the professor assign me a topic or I can suggest my own. So, since social media is a dialog, I thought I would elicit some feedback. So, dear redditors if you were taking such a class, what would you write your paper on? Some broad areas I was considering:
- User tools for social services
- White hat social media marekting
- Black hat social media marketing -Monetization strategies for social media
- The long tail of social networking
Peerless
- Focus of feedback
- Timeliness of feedback
- Anonymity
- A Wider Net
I think the reason for this is most students aren't very good writers, and it is just too much effort to get people to read an entire paper (even if its only 5 pages). One of the nice things about blogs, as a form of writing, is that they break things up into nice, bit size, chunks for people to digest slowly, perfect for review as you go. In my writing, I am not trying to be Emanual Kant, where each sentence is profound, but also takes 10 minutes to understand. Instead a good paper really only has 2 or 3 complex ideas that are difficult to explain, and that is where I need the help. So rather than waste my reviewers valuable time reading the entire paper, I think it helps to have them review only specific parts of a paper. This helps them, and me focus on the areas that need it.
Time is of the Essence
How many times have you had someone review a paper knowing full well that it is too late for major changes? Putting a paper on a blog as you write it makes sure: a) You actually write it gradually as opposed to the last minute (again bite sized chunks), and b) You find and weed out the truly bad ideas when they come up, rather than 3 hours before the paper is due.
Come on baby make it hurt so good
Both in brick and mortar and online classes at big universities share at least one basic problem: Students are mostly strangers to each other. Everything students have been taught about social etiquette tells them that telling people how much their writing sucks is not a good way to start interacting. Thus, many people will refrain from giving good feedback simply because giving good feedback (criticism) to strangers is impolite. Which leads us to...
No one knows you are a Dog on the internet
People will (anonymously) say things on the internet, that they will not say in real life. I believe, we will test this hypothesis, that this is an advantage of the internet. People love to argue, and if you make provocative posts people will chime in with opinons which will show you the weakness of your arguement/ideas (assuming you can weed out the obvious trolls). Also if your post doesn't make sense,and you have enough people reading your blog, and your readers are sane, it will show you where you should explain your ideas better.
Are you Pondering what I'm Pondering
One of the other problems with class-mates reviewing your paper, is that they may not be any better or more knowledgeable writers than you. In addition, social media is a wide area and the topic you are covering may be very different from theirs. One of the things I hope to get from publicizing this blog is feedback from people who ARE NOT my classmates, or necessarily even college students but are instead interested and learning about/reasearching the same topic I am writing about.
What's Going on Here
Whats in it for me
In addition to studying how social media helps businesses, I thought I would look into how social media helps me write. Though I believe I am a strong writer overall, I think that one of my weaker points is taking notes and keeping sources straight. I tend to scribble indecipherable notes, as I read, on scrap pieces of paper, which I then lose. So when it is time to write, I end up surfing the net or crawling through a book, trying to find my sources from memory. This is a huge time-waster, and I think a blog will help. So...here is my list of resolutions for keeping this blog as the central repository of knowledge for my paper:
- Post parts of the paper as posts when inspiration strikes
- Submit my comments, notes, juxtapositional observations on articles I read as posts
- Include links to read later as posts
- Use this blog to summarize my attempts at using other social media (reddit, 4chan, answers?) to help me write my paper
- What else (please comment)?
Twitter Didn't Satisfy Me
I actually want comments
Twitter seems to be just a bunch of people blathering into space (some with good links btw), so it is good for status updates to people following you, but it is not a good means of two way communications. I think a good paper comes from dialog, and twitter fails to do that.
Twitter doesn't let you search(or edit) what you posted.
This is a real problem since for all I know, my professor for this class is reading a blog with a bunch of spelling errors. Also since I started following two people, my own posts get buried quickly and I can't find what I wrote a few days ago (at least not easily).
I know a lot of this stuff can be done through the Twitter API (possibly a future business idea), but right now I just don't have time to make a decent client for it.
Blogspot lets you post by SMS as well
For some reason, I didn't think that a blog could be posted to by SMS, so I was pleasently surprised when this option came up on my blogspot signup. I sometimes get ideas in pretty strange places, and I have a 25 dollar a month data plan to justify. So, for me, this is a killer feature.