Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

updates for python3 #11

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 26 additions & 25 deletions data_fetcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
import pandas as pd
import random
import time
import urllib2
import requests
from requests.utils import requote_uri

from BeautifulSoup import BeautifulSoup
from datetime import datetime

DATA_DIR = "data"
Expand All @@ -25,19 +25,19 @@ def _download_sp500_list():
if os.path.exists(SP500_LIST_PATH):
return

f = urllib2.urlopen(SP500_LIST_URL)
print "Downloading ...", SP500_LIST_URL
with open(SP500_LIST_PATH, 'w') as fin:
print >> fin, f.read()
r = requests.get(SP500_LIST_URL)
print("Downloading ...", SP500_LIST_URL)
with open(SP500_LIST_PATH, 'wb') as fin:
fin.write(r.content)
return


def _load_symbols():
_download_sp500_list()
df_sp500 = pd.read_csv(SP500_LIST_PATH)
df_sp500.sort('Market Cap', ascending=False, inplace=True)
df_sp500.sort_values('Market Cap', ascending=False, inplace=True)
stock_symbols = df_sp500['Symbol'].unique().tolist()
print "Loaded %d stock symbols" % len(stock_symbols)
print("Loaded %d stock symbols" % len(stock_symbols))
return stock_symbols


Expand All @@ -53,33 +53,34 @@ def fetch_prices(symbol, out_name):
# Format today's date to match Google's finance history api.
now_datetime = datetime.now().strftime("%b+%d,+%Y")

BASE_URL = "https://www.google.com/finance/historical?output=csv&q={0}&startdate=Jan+1%2C+1980&enddate={1}"
symbol_url = BASE_URL.format(
urllib2.quote(symbol),
urllib2.quote(now_datetime, '+')
)
print "Fetching {} ...".format(symbol)
print symbol_url
BASE_URL = "https://finance.google.com/finance/historical?output=csv&q={0}&startdate=Jan+1%2C+1980&enddate={1}"
symbol_url = requote_uri(BASE_URL.format(
symbol,
now_datetime
))
print("Fetching {} ...".format(symbol))
print(symbol_url)

try:
f = urllib2.urlopen(symbol_url)
with open(out_name, 'w') as fin:
print >> fin, f.read()
except urllib2.HTTPError:
print "Failed when fetching {}".format(symbol)
r = requests.get(symbol_url)
r.raise_for_status()
with open(out_name, 'wb') as fin:
fin.write(r.content)
except requests.exceptions.HTTPError as err:
print("Failed {} when fetching {}".format(err, symbol))
return False

data = pd.read_csv(out_name)
if data.empty:
print "Remove {} because the data set is empty.".format(out_name)
print("Remove {} because the data set is empty.".format(out_name))
os.remove(out_name)
else:
dates = data.iloc[:,0].tolist()
print "# Fetched rows: %d [%s to %s]" % (data.shape[0], dates[-1], dates[0])
print("# Fetched rows: %d [%s to %s]" % (data.shape[0], dates[-1], dates[0]))

# Take a rest
sleep_time = random.randint(*RANDOM_SLEEP_TIMES)
print "Sleeping ... %ds" % sleep_time
print("Sleeping ... %ds" % sleep_time)
time.sleep(sleep_time)
return True

Expand All @@ -97,14 +98,14 @@ def main(continued):
for idx, sym in enumerate(symbols):
out_name = os.path.join(DATA_DIR, sym + ".csv")
if continued and os.path.exists(out_name):
print "Fetched", sym
print("Fetched", sym)
continue

succeeded = fetch_prices(sym, out_name)
num_failure += int(not succeeded)

if idx % 10 == 0:
print "# Failures so far [%d/%d]: %d" % (idx + 1, len(symbols), num_failure)
print("# Failures so far [%d/%d]: %d" % (idx + 1, len(symbols), num_failure))


if __name__ == "__main__":
Expand Down
6 changes: 3 additions & 3 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,15 +52,15 @@ def load_sp500(input_size, num_steps, k=None, target_symbol=None, test_ratio=0.0
info = pd.read_csv("data/constituents-financials.csv")
info = info.rename(columns={col: col.lower().replace(' ', '_') for col in info.columns})
info['file_exists'] = info['symbol'].map(lambda x: os.path.exists("data/{}.csv".format(x)))
print info['file_exists'].value_counts().to_dict()
print(info['file_exists'].value_counts().to_dict())

info = info[info['file_exists'] == True].reset_index(drop=True)
info = info.sort('market_cap', ascending=False).reset_index(drop=True)
info = info.sort_values('market_cap', ascending=False).reset_index(drop=True)

if k is not None:
info = info.head(k)

print "Head of S&P 500 info:\n", info.head()
print("Head of S&P 500 info:\n", info.head())

# Generate embedding meta file
info[['symbol', 'sector']].to_csv(os.path.join("logs/metadata.tsv"), sep='\t', index=False)
Expand Down
16 changes: 8 additions & 8 deletions model_rnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,9 +171,9 @@ def train(self, dataset_list, config):
merged_test_y = np.array(merged_test_y)
merged_test_labels = np.array(merged_test_labels)

print "len(merged_test_X) =", len(merged_test_X)
print "len(merged_test_y) =", len(merged_test_y)
print "len(merged_test_labels) =", len(merged_test_labels)
print("len(merged_test_X) =", len(merged_test_X))
print("len(merged_test_y) =", len(merged_test_y))
print("len(merged_test_labels) =", len(merged_test_labels))

test_data_feed = {
self.learning_rate: 0.0,
Expand All @@ -196,10 +196,10 @@ def train(self, dataset_list, config):
i for i, sym_label in enumerate(merged_test_labels)
if sym_label[0] == l])
sample_indices[sym] = target_indices
print sample_indices
print(sample_indices)

print "Start training for stocks:", [d.stock_sym for d in dataset_list]
for epoch in xrange(config.max_epoch):
print("Start training for stocks:", [d.stock_sym for d in dataset_list])
for epoch in range(config.max_epoch):
epoch_step = 0
learning_rate = config.init_learning_rate * (
config.learning_rate_decay ** max(float(epoch + 1 - config.init_epoch), 0.0)
Expand All @@ -223,8 +223,8 @@ def train(self, dataset_list, config):
if np.mod(global_step, len(dataset_list) * 100 / config.input_size) == 1:
test_loss, test_pred = self.sess.run([self.loss, self.pred], test_data_feed)

print "Step:%d [Epoch:%d] [Learning rate: %.6f] train_loss:%.6f test_loss:%.6f" % (
global_step, epoch, learning_rate, train_loss, test_loss)
print("Step:%d [Epoch:%d] [Learning rate: %.6f] train_loss:%.6f test_loss:%.6f" % (
global_step, epoch, learning_rate, train_loss, test_loss))

# Plot samples
for sample_sym, indices in sample_indices.iteritems():
Expand Down