12

I am using this example to upload a csv file into a sqlite database:

this is my code:

from numpy import genfromtxt
from time import time
from datetime import datetime
from sqlalchemy import Column, Integer, Float, Date, String, VARCHAR
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker

def Load_Data(file_name):
    data = genfromtxt(file_name, delimiter=',')# skiprows=1, converters={0: lambda s: str(s)})
    return data.tolist()

Base = declarative_base()

class cdb1(Base):
    #Tell SQLAlchemy what the table name is and if there's any table-specific arguments it should know about
    __tablename__ = 'cdb1'
    __table_args__ = {'sqlite_autoincrement': True}
    #tell SQLAlchemy the name of column and its attributes:
    id = Column(Integer, primary_key=True, nullable=False) 
    name = Column(VARCHAR(40))
    shack = Column(VARCHAR)
    db = Column(Integer)
    payments = Column(Integer)
    status = Column(VARCHAR)


if __name__ == "__main__":
    t = time()
    print 'creating database'

    #Create the database
    engine = create_engine('sqlite:///cdb.db')
    Base.metadata.create_all(engine)

    #Create the session
    session = sessionmaker()
    session.configure(bind=engine)
    s = session()

    try:
        file_name = 'client_db.csv'
        data = Load_Data(file_name)

        for i in data:
            record = cdb1(**{
                'name' : i[0],
                'shack' : i[1],
                'db' : i[2],
                'payments' : i[3],
                'status' : i[4]
            })
            s.add(record) #Add all the records

        s.commit() #Attempt to commit all the records
    except:
        s.rollback() #Rollback the changes on error
        print 'error in reading'
    finally:
        s.close() #Close the connection
    print "Time elapsed: " + str(time() - t) + " s." #0.091s

and this is the first few rows of the csv file:

Name,Shack,DB,Payments,Status
Loyiso Dwala,I156,13542,37,LightsOnly ON
Attwell Fayo,I157,13077,32,LightsON
David Mbhele,G25,13155,33,LightsON

The DB is created ok, but only some of the data is captured into the attributes: the 'payments' and 'db' column are populated correctly, but everything else comes out as NULL.

UPDATED CORRECT CODE (using pandas dataframe):

from numpy import genfromtxt
from time import time
from datetime import datetime
from sqlalchemy import Column, Integer, Float, Date, String, VARCHAR
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
import csv
import pandas as pd


#def Load_Data(file_name):
    #data = csv.reader(file_name, delimiter=',')# skiprows=1, converters={0: lambda s: str(s)})
    #return data.tolist()

Base = declarative_base()

class cdb1(Base):
    #Tell SQLAlchemy what the table name is and if there's any table-specific arguments it should know about
    __tablename__ = 'cdb1'
    __table_args__ = {'sqlite_autoincrement': True}
    #tell SQLAlchemy the name of column and its attributes:
    id = Column(Integer, primary_key=True, nullable=False) 
    Name = Column(VARCHAR(40))
    Shack = Column(VARCHAR)
    DB = Column(Integer)
    Payments = Column(Integer)
    Status = Column(VARCHAR)

engine = create_engine('sqlite:///cdb.db')
Base.metadata.create_all(engine)
file_name = 'client_db.csv'
df = pd.read_csv(file_name)
df.to_sql(con=engine, index_label='id', name=cdb1.__tablename__, if_exists='replace')
1

1 Answer 1

17

Are you familiar with Pandas Dataframe?

Really simple to use (and debug)

pandas.read_csv(file_name)

In [5]: pandas.read_csv('/tmp/csvt.csv')
Out[5]: 
           Name Shack     DB  Payments         Status
0  Loyiso Dwala  I156  13542        37  LightsOnly ON
1  Attwell Fayo  I157  13077        32       LightsON
2  David Mbhele   G25  13155        33       LightsON

For inserting the DataFrames data into a table, you can simply use pandas.DataFrame.to_sql

So your main code will end up looking something like this:

engine = create_engine('sqlite:///cdb.db')
Base.metadata.create_all(engine)

file_name = 'client_db.csv'
df = pandas.read_csv(file_name)
df.to_sql(con=engine, index_label='id', name=cdb1.__tablename__, if_exists='replace')

You should read further in the documentation link I added, and set the function Parameters as suits your purpose (specially look at - if_exists, index, index_label, dtype)

Sign up to request clarification or add additional context in comments.

6 Comments

I am aware of pandas dataframes... what would be the bext way to insert the dataframe into the database?
great, works perfectly. Have updated with updated code
Isn't this solution restricted only to small csv file/tables/dbs?
Downvoted, this is extremely inefficient and will take hours for large tables.
For very large tables, this approach will exhaust your memory and crash.
|

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.