如何使用PyCharm编写Spark程序

2025-04-14 00:59:35
推荐回答(1个)
回答1:

import os
import sys

# Path for spark source folder
os.environ['SPARK_HOME'] = "/Users/dustinchen/Documents/APP/spark-1.6.1-bin-hadoop2.6"

# You might need to enter your local IP
# os.environ['SPARK_LOCAL_IP']="192.168.2.138"

# Path for pyspark and py4j
sys.path.append("/Users/dustinchen/Documents/APP/spark-1.6.1-bin-hadoop2.6/python")
sys.path.append("/Users/dustinchen/Documents/APP/spark-1.6.1-bin-hadoop2.6/python/lib/py4j-0.9-src.zip")

try:
from pyspark import SparkContext
from pyspark import SparkConf

print ("Successfully imported Spark Modules")
except ImportError as e:
print ("Can not import Spark Modules", e)
sys.exit(1)

sc = SparkContext('local')
words = sc.parallelize(["scala", "java", "hadoop", "spark", "akka"])
print(words.count())