RDD Creation with "Read from file"

text_rdd=sc.textFile("C:/Users/mhtpr/Documents/sample.txt")


type(text_rdd)
pyspark.rdd.RDD


text_rdd.count()
16


text_rdd.collect()
['Troubleshooting',
 '',
 'Problem',
 'I have a notebook in which I add:',
 '',
 'sparkSession=sparkSession(sc).builder.getOrCreate()',
 '',
 'I get:',
 '',
 'Name Error: sc is not defined',
 '',
 'Diagnosing The Problem',
 'User had imported a custom Hive JDBC jar.',
 '',
 'Resolving The Problem',
 'Navigate to the Admin Dashboard, and under scripts, select the script for adding/removing custom JDBC drivers and jars and remove the Hive JDBC jar. Restart the notebook environment and Insert-to-Code for Spark DataFrame should work fine afterwards.']




text_rdd.glom().collect()
[['Troubleshooting',
  '',
  'Problem',
  'I have a notebook in which I add:',
  '',
  'sparkSession=sparkSession(sc).builder.getOrCreate()',
  '',
  'I get:',
  '',
  'Name Error: sc is not defined',
  '',
  'Diagnosing The Problem',
  'User had imported a custom Hive JDBC jar.',
  '',
  'Resolving The Problem',
  'Navigate to the Admin Dashboard, and under scripts, select the script for adding/removing custom JDBC drivers and jars and remove the Hive JDBC jar. Restart the notebook environment and Insert-to-Code for Spark DataFrame should work fine afterwards.']]
 


===============================================================================