From df9418e5dcedb7e2009452d2aa85e059d54bc313 Mon Sep 17 00:00:00 2001 From: andr3w321 Date: Fri, 5 Jan 2018 01:49:18 -0800 Subject: [PATCH] updates for python3 --- data_fetcher.py | 51 +++++++++++++++++++++++++------------------------ main.py | 6 +++--- model_rnn.py | 16 ++++++++-------- 3 files changed, 37 insertions(+), 36 deletions(-) diff --git a/data_fetcher.py b/data_fetcher.py index b311830..5d11627 100644 --- a/data_fetcher.py +++ b/data_fetcher.py @@ -7,9 +7,9 @@ import pandas as pd import random import time -import urllib2 +import requests +from requests.utils import requote_uri -from BeautifulSoup import BeautifulSoup from datetime import datetime DATA_DIR = "data" @@ -25,19 +25,19 @@ def _download_sp500_list(): if os.path.exists(SP500_LIST_PATH): return - f = urllib2.urlopen(SP500_LIST_URL) - print "Downloading ...", SP500_LIST_URL - with open(SP500_LIST_PATH, 'w') as fin: - print >> fin, f.read() + r = requests.get(SP500_LIST_URL) + print("Downloading ...", SP500_LIST_URL) + with open(SP500_LIST_PATH, 'wb') as fin: + fin.write(r.content) return def _load_symbols(): _download_sp500_list() df_sp500 = pd.read_csv(SP500_LIST_PATH) - df_sp500.sort('Market Cap', ascending=False, inplace=True) + df_sp500.sort_values('Market Cap', ascending=False, inplace=True) stock_symbols = df_sp500['Symbol'].unique().tolist() - print "Loaded %d stock symbols" % len(stock_symbols) + print("Loaded %d stock symbols" % len(stock_symbols)) return stock_symbols @@ -53,33 +53,34 @@ def fetch_prices(symbol, out_name): # Format today's date to match Google's finance history api. now_datetime = datetime.now().strftime("%b+%d,+%Y") - BASE_URL = "https://www.google.com/finance/historical?output=csv&q={0}&startdate=Jan+1%2C+1980&enddate={1}" - symbol_url = BASE_URL.format( - urllib2.quote(symbol), - urllib2.quote(now_datetime, '+') - ) - print "Fetching {} ...".format(symbol) - print symbol_url + BASE_URL = "https://finance.google.com/finance/historical?output=csv&q={0}&startdate=Jan+1%2C+1980&enddate={1}" + symbol_url = requote_uri(BASE_URL.format( + symbol, + now_datetime + )) + print("Fetching {} ...".format(symbol)) + print(symbol_url) try: - f = urllib2.urlopen(symbol_url) - with open(out_name, 'w') as fin: - print >> fin, f.read() - except urllib2.HTTPError: - print "Failed when fetching {}".format(symbol) + r = requests.get(symbol_url) + r.raise_for_status() + with open(out_name, 'wb') as fin: + fin.write(r.content) + except requests.exceptions.HTTPError as err: + print("Failed {} when fetching {}".format(err, symbol)) return False data = pd.read_csv(out_name) if data.empty: - print "Remove {} because the data set is empty.".format(out_name) + print("Remove {} because the data set is empty.".format(out_name)) os.remove(out_name) else: dates = data.iloc[:,0].tolist() - print "# Fetched rows: %d [%s to %s]" % (data.shape[0], dates[-1], dates[0]) + print("# Fetched rows: %d [%s to %s]" % (data.shape[0], dates[-1], dates[0])) # Take a rest sleep_time = random.randint(*RANDOM_SLEEP_TIMES) - print "Sleeping ... %ds" % sleep_time + print("Sleeping ... %ds" % sleep_time) time.sleep(sleep_time) return True @@ -97,14 +98,14 @@ def main(continued): for idx, sym in enumerate(symbols): out_name = os.path.join(DATA_DIR, sym + ".csv") if continued and os.path.exists(out_name): - print "Fetched", sym + print("Fetched", sym) continue succeeded = fetch_prices(sym, out_name) num_failure += int(not succeeded) if idx % 10 == 0: - print "# Failures so far [%d/%d]: %d" % (idx + 1, len(symbols), num_failure) + print("# Failures so far [%d/%d]: %d" % (idx + 1, len(symbols), num_failure)) if __name__ == "__main__": diff --git a/main.py b/main.py index e1df66d..9e26832 100644 --- a/main.py +++ b/main.py @@ -52,15 +52,15 @@ def load_sp500(input_size, num_steps, k=None, target_symbol=None, test_ratio=0.0 info = pd.read_csv("data/constituents-financials.csv") info = info.rename(columns={col: col.lower().replace(' ', '_') for col in info.columns}) info['file_exists'] = info['symbol'].map(lambda x: os.path.exists("data/{}.csv".format(x))) - print info['file_exists'].value_counts().to_dict() + print(info['file_exists'].value_counts().to_dict()) info = info[info['file_exists'] == True].reset_index(drop=True) - info = info.sort('market_cap', ascending=False).reset_index(drop=True) + info = info.sort_values('market_cap', ascending=False).reset_index(drop=True) if k is not None: info = info.head(k) - print "Head of S&P 500 info:\n", info.head() + print("Head of S&P 500 info:\n", info.head()) # Generate embedding meta file info[['symbol', 'sector']].to_csv(os.path.join("logs/metadata.tsv"), sep='\t', index=False) diff --git a/model_rnn.py b/model_rnn.py index 2c6217c..53e08c7 100644 --- a/model_rnn.py +++ b/model_rnn.py @@ -171,9 +171,9 @@ def train(self, dataset_list, config): merged_test_y = np.array(merged_test_y) merged_test_labels = np.array(merged_test_labels) - print "len(merged_test_X) =", len(merged_test_X) - print "len(merged_test_y) =", len(merged_test_y) - print "len(merged_test_labels) =", len(merged_test_labels) + print("len(merged_test_X) =", len(merged_test_X)) + print("len(merged_test_y) =", len(merged_test_y)) + print("len(merged_test_labels) =", len(merged_test_labels)) test_data_feed = { self.learning_rate: 0.0, @@ -196,10 +196,10 @@ def train(self, dataset_list, config): i for i, sym_label in enumerate(merged_test_labels) if sym_label[0] == l]) sample_indices[sym] = target_indices - print sample_indices + print(sample_indices) - print "Start training for stocks:", [d.stock_sym for d in dataset_list] - for epoch in xrange(config.max_epoch): + print("Start training for stocks:", [d.stock_sym for d in dataset_list]) + for epoch in range(config.max_epoch): epoch_step = 0 learning_rate = config.init_learning_rate * ( config.learning_rate_decay ** max(float(epoch + 1 - config.init_epoch), 0.0) @@ -223,8 +223,8 @@ def train(self, dataset_list, config): if np.mod(global_step, len(dataset_list) * 100 / config.input_size) == 1: test_loss, test_pred = self.sess.run([self.loss, self.pred], test_data_feed) - print "Step:%d [Epoch:%d] [Learning rate: %.6f] train_loss:%.6f test_loss:%.6f" % ( - global_step, epoch, learning_rate, train_loss, test_loss) + print("Step:%d [Epoch:%d] [Learning rate: %.6f] train_loss:%.6f test_loss:%.6f" % ( + global_step, epoch, learning_rate, train_loss, test_loss)) # Plot samples for sample_sym, indices in sample_indices.iteritems():