From fff1f961eca5397e1cf12016ef5f561424a9b5c7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Szymon=20S=C5=82omi=C5=84ski?= <simon.slominski@gmail.com>
Date: Fri, 14 Sep 2018 21:48:19 +0200
Subject: [PATCH] Update to Python 3

---
 doAnalysis.py | 30 ++++++++++++++++++++----------
 1 file changed, 20 insertions(+), 10 deletions(-)

diff --git a/doAnalysis.py b/doAnalysis.py
index 783253f..1a65c2f 100644
--- a/doAnalysis.py
+++ b/doAnalysis.py
@@ -1,7 +1,15 @@
 """
-Author: Stephen W. Thomas
+I modified the script to work with Python 3.
+Hint: if you install all libraries and you get an error: 
+"Matplotlib Python is not installed as a framework" 
+use the following code in the terminal:
 
-Perform sentiment analysis using TextBlob to do the heavy lifting.
+echo "backend: TkAgg" >> ~ / .matplotlib / matplotlibrc
+
+Requirements:
+textblob
+numpy
+matplotlib
 """
 
 # The main package to help us with our text analysis
@@ -51,9 +59,10 @@ def strip_non_ascii(string):
 # clean:    The preprocessed string of characters
 # TextBlob: The TextBlob object, created from the 'clean' string
 
-with open('newtwitter.csv', 'rb') as csvfile:
+with open('newtwitter.small.csv', encoding = "ISO-8859-1") as csvfile:
     reader = csv.reader(csvfile, delimiter=',')
-    reader.next()
+    next(reader)
+
     for row in reader:
 
         tweet= dict()
@@ -135,25 +144,26 @@ def strip_non_ascii(string):
 
 # First, print out a few example tweets from each sentiment category.
 
-print "\n\nTOP NEGATIVE TWEETS"
+print("\n\nTOP NEGATIVE TWEETS")
 negative_tweets = [d for d in tweets_sorted if d['sentiment'] == 'negative']
 for tweet in negative_tweets[0:100]:
-    print "id=%d, polarity=%.2f, clean=%s" % (tweet['id'], tweet['polarity'], tweet['clean'])
+    print("id=%d, polarity=%.2f, clean=%s" % (tweet['id'], tweet['polarity'], tweet['clean']))
 
-print "\n\nTOP POSITIVE TWEETS"
+print("\n\nTOP POSITIVE TWEETS")
 positive_tweets = [d for d in tweets_sorted if d['sentiment'] == 'positive']
 for tweet in positive_tweets[-100:]:
-    print "id=%d, polarity=%.2f, clean=%s" % (tweet['id'], tweet['polarity'], tweet['clean'])
+    print("id=%d, polarity=%.2f, clean=%s" % (tweet['id'], tweet['polarity'], tweet['clean']))
 
-print "\n\nTOP NEUTRAL TWEETS"
+print("\n\nTOP NEUTRAL TWEETS")
 neutral_tweets = [d for d in tweets_sorted if d['sentiment'] == 'neutral']
 for tweet in neutral_tweets[0:500]:
-    print "id=%d, polarity=%.2f, clean=%s" % (tweet['id'], tweet['polarity'], tweet['clean'])
+    print("id=%d, polarity=%.2f, clean=%s" % (tweet['id'], tweet['polarity'], tweet['clean']))
 
 
 # Next, create some plots
 
 # A histogram of the scores.
+
 x = [d['polarity'] for d in tweets_sorted]
 num_bins = 21
 n, bins, patches = plt.hist(x, num_bins, normed=1, facecolor='green', alpha=0.5)