# MAGIC * `degrees`: Total connections to and from the airport # MAGIC * `outDegrees`: Outgoing connections from the airport # MAGIC * `inDegrees`: Incoming connections to the airport # States with the longest cumulative delays (with individual delays > 100 minutes) (origin: Seattle)ĭisplay( tripGraph. # MAGIC %md # What destinations tend to have significant delays departing from SEA # After displaying tripDelays, use Plot Options to set `state_dst` as a Key. # MAGIC %md # What destinations tend to have delays # MAGIC %md # What flights departing SFO are most likely to have significant delaysĭisplay( tripGraph. Print "On-time / Early Flights: %d" % tripGraph. # Determining number of on-time / early flights vs. # MAGIC %md # Determining the number of delayed vs. # MAGIC %md # Determining the longest delay in this dataset # MAGIC %md # Determine the number of airports and trips # MAGIC Let's start with a set of simple graph queries to understand flight performance and departure delays TripGraphPrime = GraphFrame( tripVertices, tripEdgesPrime) # This graphframe contains a smaller subset of data to make it easier to display motifs and subgraphs (below) TripGraph = GraphFrame( tripVertices, tripEdges) # This GraphFrame builds up on the vertices and edges based on our trips (flights) # The edges of our graph are the flights between airports # The vertices of our graph are the airports # Create Vertices (airports) and Edges (flights) # Note, ensure you have already installed the GraphFrames spack-package # MAGIC %md **WARNING:** If the graphframes package, required in the cell below, is not installed, follow the instructions (). # MAGIC These are required naming conventions for vertices and edges in GraphFrames as of the time of this writing (Feb. # MAGIC * Start and End airports to **src** and **dst** for the Edges Table (flights) # MAGIC * Rename IATA airport code to **id** in the Vertices Table What's awesome about GraphFrames is that this process is incredibly simple. We are going to build the structure of the vertices (or nodes) and we're going to build the structure of the edges. # MAGIC Now that we've imported our data, we're going to need to build our graph. registerTempTable( "departureDelays_geo") sql( "select cast(f.date as int) as tripid, cast(concat(concat(concat(concat(concat(concat('2014-', concat(concat(substr(cast(f.date as string), 1, 2), '-')), substr(cast(f.date as string), 3, 2)), ' '), substr(cast(f.date as string), 5, 2)), ':'), substr(cast(f.date as string), 7, 2)), ':00') as timestamp) as `localdate`, cast(f.delay as int), cast(f.distance as int), f.origin as src, f.destination as dst, o.city as city_src, d.city as city_dst, o.state as state_src, d.state as state_dst from departuredelays f join airports o on o.iata = f.origin join airports d on d.iata = f.destination")ĭepartureDelays_geo. # Obtain key attributes such as Date of flight, delays, distance, and airport information (Origin, Destination)ĭepartureDelays_geo = sqlContext. sql( "select f.IATA, f.City, f.State, f.Country from airports_na f join tripIATA t on t.IATA = f.IATA") # Only include airports with atleast one trip from the departureDelays datasetĪirports = sqlContext. sql( "select distinct iata from (select distinct origin as iata from departureDelays union all select distinct destination as iata from departureDelays) a") # Available IATA codes from the departuredelays sample dataset load( tripdelaysFilePath)ĭepartureDelays. registerTempTable( "airports_na")ĭepartureDelays = sqlContext. options( header = 'true', inferschema = 'true', delimiter = ' \t'). TripdelaysFilePath = "/databricks-datasets/flights/departuredelays.csv"ĪirportsnaFilePath = "/databricks-datasets/flights/airport-codes-na.txt"Īirportsna = sqlContext. # MAGIC Extract the Airports and Departure Delays information from S3 / DBFS # MAGIC * Note, the data used here was extracted from the US DOT:BTS between and * # MAGIC This notebook provides an analysis of On-Time Flight Performance and Departure Delays data using GraphFrames for Apache Spark. # MAGIC %md # On-Time Flight Performance with GraphFrames for Apache Spark # MAGIC *If you see !() at the top-left or top-right, click on the link to import this notebook in order to run it.* # Databricks notebook source exported at Mon, 16:02:51 UTC
0 Comments
Leave a Reply. |