pyspark读取csv中的数据。
csv有header。header中有两列,列名为:bd,tt。
from pyspark.sql import SparkSessionfrom pyspark.sql.functions import *from pyspark.sql.types import StructType, StructField, IntegerType, StringTypedef run(): spark = SparkSession \ .builder \ .appName("read_csv") \ .getOrCreate() \ \ # 定义模式 schema = StructType([StructField('bd', StringType(), True), StructField('tt', StringType(), True)], ) df = spark.read.csv(r"map.csv", schema=schema, encoding='utf-8', header=True) # header表示数据的第一行是否为列名,inferSchema表示自动推断schema,此时未指定schema df = df.select("bd", "tt") rows = df.collect() for row in rows: result[row['bd']] = row['tt'].split(";") analysis()if __name__ == '__main__': run()