pyspark读取csv中的数据。
csv有header。header中有两列,列名为:bd,tt。

from pyspark.sql import SparkSessionfrom pyspark.sql.functions import *from pyspark.sql.types import StructType, StructField, IntegerType, StringTypedef run():    spark = SparkSession \        .builder \        .appName("read_csv") \        .getOrCreate() \ \        # 定义模式    schema = StructType([StructField('bd', StringType(), True),                         StructField('tt', StringType(), True)],                        )    df = spark.read.csv(r"map.csv", schema=schema, encoding='utf-8', header=True)  # header表示数据的第一行是否为列名,inferSchema表示自动推断schema,此时未指定schema    df = df.select("bd", "tt")    rows = df.collect()    for row in rows:        result[row['bd']] = row['tt'].split(";")    analysis()if __name__ == '__main__':    run()