Skip to content

Commit

Permalink
fixed for readmes and robust04
Browse files Browse the repository at this point in the history
  • Loading branch information
cmacdonald committed Jun 11, 2019
1 parent 3a27e48 commit 949619b
Showing 1 changed file with 8 additions and 1 deletion.
9 changes: 8 additions & 1 deletion index
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,14 @@ for collection in args.json["collections"]:
{0}""".format(path).split(), env=my_env)

#grep out any readmes
subprocess.run(["/bin/sh", "-c", "egrep -vi (readme|dtd) /work/terrier-core/etc/collection.spec > /work/terrier-core/etc/collection.spec.new; mv /work/terrier-core/etc/collection.spec.new /work/terrier-core/etc/collection.spec"], env=my_env)
subprocess.run(["/bin/sh", "-c", "egrep -vi 'readme' /work/terrier-core/etc/collection.spec > /work/terrier-core/etc/collection.spec.new; mv /work/terrier-core/etc/collection.spec.new /work/terrier-core/etc/collection.spec"], env=my_env)

#grep out congressional record
if name == "robust04":
subprocess.run(["/bin/sh", "-c", "egrep -vi 'cr93|read|dtd' /work/terrier-core/etc/collection.spec > /work/terrier-core/etc/collection.spec.new; mv /work/terrier-core/etc/collection.spec.new /work/terrier-core/etc/collection.spec"], env=my_env)

print("Files to index...")
subprocess.run(["wc", "-l", "/work/terrier-core/etc/collection.spec"])

cmd="""
/work/terrier-core/bin/terrier batchindexing -p
Expand Down

0 comments on commit 949619b

Please sign in to comment.