0

I have a huge dataset (11GB, 19million rows) and am loading it into postgres using the following script:

import csv
from datetime import date
from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker

Base = declarative_base()

from sqlalchemy import Column, Integer, String, DateTime, Float, Boolean
class Complaint(Base):
    __tablename__ = 'all_complaints'

    index = Column(Integer, primary_key=True)
    created_date = Column(DateTime)
    closed_date = Column(DateTime)
    complaint_type = Column(String)
    descriptor = Column(String)
    location_type = Column(String)
    incident_zip = Column(Integer)
    incident_address = Column(String)
    address_type = Column(String)
    city = Column(String)
    borough = Column(String)
    latitude = Column(Float)
    longitude = Column(Float)
    bedbug = Column(Boolean)
    water = Column(Boolean)
    noise = Column(Boolean)
    heat = Column(Boolean)
    pests = Column(Boolean)

    def __repr__(self):
       return "<User(index='%s', created_date='%s', incident_zip='%s')>" % (
                            self.index, self.created_date, self.incident_zip)
    def addZip(self, zip):  #some zips are string in the csv file
        try:
            self.incident_zip = int(zip)
            return True
        except ValueError:
            #print("Could not parse zip code", zip)
            return False

dbname = ''
username = ''
engine = create_engine('postgres://%s@localhost/%s'%(username,dbname), echo=False)

Base.metadata.create_all(engine)


Session = sessionmaker(bind=engine)
session = Session()


with open("311_Service_Requests_from_2010_to_Present.csv") as f:
    reader = csv.DictReader(f)
    for i, row in enumerate(reader):
        complaint = Complaint(index = i,
                              created_date = row['Created Date'],
                              closed_date = row['Closed Date'],
                              complaint_type = row['Complaint Type'],
                              descriptor = row['Descriptor'],
                              location_type = row['Location Type'],
                              incident_address = row['Incident Address'],
                              address_type = row['Address Type'],
                              city = row['City'],
                              borough = row['Borough'],
                              latitude = row['Latitude'],
                              longitude = row['Longitude'],
                              bedbug = False,
                              water = False,
                              noise = False,
                              heat = False,
                              pests = False)
                              #more fields here, everything you can copy exactly
        addedZipSuccessfully = complaint.addZip(row['Incident Zip'])
        if addedZipSuccessfully == False:
            continue
        if complaint.created_date == '':
            #complaint.created_date = NULL
            continue
        if complaint.closed_date == '':
            complaint.closed_date = None
        if complaint.location_type == '':
            continue
        if complaint.incident_address == '':
            continue
        if complaint.address_type == '':
            continue
        if complaint.city == '':
            continue
        if complaint.borough == '':
            continue
        try:
            float(complaint.latitude)
        except ValueError:
            continue
        try:
            float(complaint.latitude)
        except ValueError:
            continue
        if 'Noise' in complaint.complaint_type == True:
            complaint.noise = True
        else:
            pass
        session.add(complaint)

        if i % 1000 == 0:
            session.commit()

I got the columns I want from my csv into the database using this code, but now I want to update my 'noise' column (and eventually bedbug, water, heat, and pests) to be true if the complaint_type column contains 'Noise' in the string.

I've tried:

            if 'Noise' in complaint.complaint_type == True:
                complaint.noise = True

which doesn't update any of the fields (which evaluate to true when I check the string externally. Any ideas of how to code this in Python3?

1 Answer 1

1

Your "== True" confuses Python3. Remove it and it will work like you intend to.

if 'Noise' in complaint.complaint_type:
    complaint.noise = True
Sign up to request clarification or add additional context in comments.

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.