from pyspark.sql import SparkSession
spark=SparkSession.builder\
.master("local")\
.appName("PySpark Create RDD example")\
.config("spark.some.config.option", "some-value")\
.getOrCreate()
Student = spark.createDataFrame(
[
('009001', 'Anuj', '70%', 'B.tech(cs)'),
('009002', 'Sachin', '80%', 'B.tech(cs)'),
('008005', 'Yogesh', '94%', 'MCA'),
('007014', 'Ananya', '98%', 'MCA')],
['Roll_Num', 'Name', 'Percentage','Department'
]
)
Student.show()
Output:
+--------+------+----------+----------+ |Roll_Num| Name|Percentage|Department| +--------+------+----------+----------+ | 009001| Anuj| 70%|B.tech(cs)| | 009002|Sachin| 80%|B.tech(cs)| | 008005|Yogesh| 94%| MCA| | 007014|Ananya| 98%| MCA| +--------+------+----------+----------+
==============================================================