forked from minrk/findspark
-
Notifications
You must be signed in to change notification settings - Fork 0
/
findspark.py
60 lines (44 loc) · 1.69 KB
/
findspark.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
"""Find spark home, and initialize by adding pyspark to sys.path.
If SPARK_HOME is defined, it will be used to put pyspark on sys.path.
Otherwise, common locations for spark (currently only Homebrew's default) will be searched.
"""
from glob import glob
import os
import sys
__version__ = '0.0.3'
def find():
"""Find a local spark installation.
Will first check the SPARK_HOME env variable, and otherwise
search common installation locations, e.g. from homebrew
"""
spark_home = os.environ.get('SPARK_HOME', None)
if not spark_home:
for path in [
'/usr/local/opt/apache-spark/libexec', # OS X Homebrew
# Any other common places to look?
]:
if os.path.exists(path):
spark_home = path
break
if not spark_home:
raise ValueError("Couldn't find Spark, make sure SPARK_HOME env is set"
" or Spark is in an expected location (e.g. from homebrew installation).")
return spark_home
def init(spark_home=None):
"""Make pyspark importable.
Sets environmental variables and adds dependencies to sys.path.
If no Spark location is provided, will try to find an installation.
Parameters
----------
spark_home : str, optional, default = None
Path to Spark installation, will try to find automatically
if not provided
"""
if not spark_home:
spark_home = find()
# ensure SPARK_HOME is defined
os.environ['SPARK_HOME'] = spark_home
# add pyspark to sys.path
spark_python = os.path.join(spark_home, 'python')
py4j = glob(os.path.join(spark_python, 'lib', 'py4j-*.zip'))[0]
sys.path[:0] = [spark_python, py4j]