I want to check null values in excel files where there are many sub folders. I have wrote a python code to check the null values in excel files in each and every folder and the code was working fine but I need to simplify the code.
import os ,uuidtre
import numpy as np
import pandas as pd
import logging
path =r"C:\Users\Documents\python\Emp"
files = os.listdir(path)
for filename in files:
pathchild=path+'\\'+filename
file = os.listdir(pathchild)
files_xls = [f for f in file if f[-4:]== 'xlsx' ]
child_folders= [f for f in file if f[-4:]!= 'xlsx']
for f1 in files_xls:
filepath=pathchild+'\\'+f1
df = pd.read_excel(filepath, engine='openpyxl')
count_nan = df.isnull().sum().sum()
logging.basicConfig(filename='nanTest.log', level=logging.INFO,format='%(message)s')
if count_nan ==0:
none=' No none value in the excel file'
else:
none=' There is none values founded in the excel file'
output='File name: '+ f1, 'File path: '+ filepath, none
logging.info(output)
for f2 in child_folders:
patha=pathchild+'\\'+f2
file1 = os.listdir(patha)
files_xls1 = [f for f in file1 if f[-4:]== 'xlsx']
for f3 in files_xls1:
filechildpath=patha+'\\'+f3
df = pd.read_excel(filechildpath, engine='openpyxl')
count_nan = df.isnull().sum().sum()
if count_nan ==0:
none=' No none value in the excel file'
else:
none=' There is none values founded in the excel file'
output='File name: '+ f3,'File path: '+ filepath, none
logging.info(output)