Hi I am very new in pyspark.i didn't code in pyspark so I need help to run sql query on pyspark using python.
can you please tell me how to create dataframe and then view and run sql query on top of it? what are the modules required to run the query? Can you please help me how to run?
The data is coming from file TERR.txt
sql query:
select a.id as nmitory_id, a.dscrptn as nmitory_desc, a.nm as terr_nm, a.pstn_type, a.parnt_terr as parnt_nm_id, b.nm as parnt_terr_nm, a.start_dt, a.type,
CASE
WHEN substr (a.nm, 1, 6) IN ('105-30',
'105-31',
'105-32',
'105-41',
'105-42',
'105-43',
'200-CD',
'200-CG',
'200-CO',
'200-CP',
'200-CR',
'200-DG'
)
THEN
'JBI'
WHEN substr (a.nm, 1, 6) IN ('100-SC',
'105-05',
'105-06',
'105-07',
'105-08',
'105-13',
'105-71',
'105-72',
'105-73'
)
THEN
'JP'
WHEN substr (a.nm, 1, 6) IN ('103-16')
THEN
'JT'
WHEN substr (a.nm, 1, 6) IN ('105-51',
'200-HA',
'200-HF',
'200-HT',
'105-HT')
THEN
'JSA'
WHEN substr (a.nm, 1, 6) IN ('105-61',
'200-PR')
THEN
'PR'
WHEN substr (a.nm, 1, 3) IN ('302')
THEN
'Canada - MEM'
WHEN substr (a.nm, 1, 3) IN ('301')
THEN
'Canada - MSL'
ELSE
'Unspecified'
END
AS DEPARTMENT,
CASE
WHEN substr (a.nm, 1, 6) IN ('105-06',
'105-07',
'105-08'
)
THEN
'CVM MSL'
WHEN substr (a.nm, 1, 6) IN ('100-SC',
'105-13'
)
THEN
'CVM CSS'
WHEN substr (a.nm, 1, 6) IN ('105-41',
'200-CD'
)
THEN
'Derm MSL'
WHEN substr (a.nm, 1, 6) IN ('105-42',
'200-CG'
)
THEN
'Gastro MSL'
WHEN substr (a.nm, 1, 6) IN ('105-31')
THEN
'Heme Onc MSL'
WHEN substr (a.nm, 1, 6) IN ('200-DG')
THEN
'Imm MD'
WHEN substr (a.nm, 1, 6) IN ('103-16')
THEN
'ID MSL'
WHEN substr (a.nm, 1, 6) IN ('200-CP')
THEN
'Imm Ops'
WHEN substr (a.nm, 1, 6) IN ('105-05',
'105-71',
'105-72',
'105-73'
)
THEN
'Neuro MSL'
WHEN substr (a.nm, 1, 6) IN ('105-30',
'200-CO'
)
THEN
'Onc MSL'
WHEN substr (a.nm, 1, 6) IN ('105-61',
'200-PR'
)
THEN
'Puerto Rico MSL'
WHEN substr (a.nm, 1, 6) IN ('105-43',
'200-CR'
)
THEN
'Rheum MSL'
WHEN substr (a.nm, 1, 6) IN ('105-51',
'200-HF'
)
THEN
'RWVE Field'
WHEN substr (a.nm, 1, 6) IN ('105-32')
THEN
'Solid Tumor MSL'
WHEN substr (a.nm, 1, 6) IN ('200-HT',
'105-HT')
THEN
'RWVE Pop Health'
WHEN substr (a.nm, 1, 6) IN ('301-PC')
THEN
'Canada - PC MSL'
WHEN substr (a.nm, 1, 6) IN ('301-VR')
THEN
'Canada - VR/ONC MSL'
WHEN substr (a.nm, 1, 6) IN ('301-SO')
THEN
'Canada - Hematology (Myeloid) MSL'
WHEN substr (a.nm, 1, 6) IN ('301-ON')
THEN
'Canada - Hematology (Lymphoid) MSL'
WHEN substr (a.nm, 1, 6) IN ('301-IP')
THEN
'Canada - CNS MSL'
WHEN substr (a.nm, 1, 6) IN ('301-RD')
THEN
'Canada - Rheum MSL'
WHEN substr (a.nm, 1, 6) IN ('301-IB')
THEN
'Canada - Gastro MSL'
WHEN substr (a.nm, 1, 6) IN ('301-DE')
THEN
'Canada - Derm MSL'
WHEN substr (a.nm, 1, 6) IN ('301-SE')
THEN
'Canada - Biologics MSL'
WHEN substr (a.nm, 1, 6) IN ('302-PC')
THEN
'Canada - PC MEM'
WHEN substr (a.nm, 1, 6) IN ('302-VR')
THEN
'Canada - VR/ONC MEM'
WHEN substr (a.nm, 1, 6) IN ('302-SO')
THEN
'Canada - Hematology (Myeloid) MEM'
WHEN substr (a.nm, 1, 6) IN ('302-ON')
THEN
'Canada - Hematology (Lymphoid) MEM'
WHEN substr (a.nm, 1, 6) IN ('302-IP')
THEN
'Canada - CNS MEM'
WHEN substr (a.nm, 1, 6) IN ('302-RD')
THEN
'Canada - Rheum MEM'
WHEN substr (a.nm, 1, 6) IN ('302-IB')
THEN
'Canada - Gastro MEM'
WHEN substr (a.nm, 1, 6) IN ('302-DE')
THEN
'Canada - Derm MEM'
WHEN substr (a.nm, 1, 6) IN ('302-SE')
THEN
'Canada - Biologics MEM'
ELSE
'Unspecified'
END
AS FRANCHISE
from outbound.terr a left outer join outbound.terr b on a.parnt_terr = b.id