1

Tensorflow.js cnn example is nice and I decided to train with my custom character images (local images like this imgur. also available as browser img elements). However, I can't replicate the test because the examples' code uses preprocessed data images.

I copied the example of here (https://github.com/tensorflow/tfjs-examples/blob/master/mnist-node/README.md) and added required node js packages. The example ran successfully. But I realized that I can't change the data of the example is using because it loads preprocessed datas like below.

const BASE_URL = 'https://storage.googleapis.com/cvdf-datasets/mnist/';
const TRAIN_IMAGES_FILE = 'train-images-idx3-ubyte';
const TRAIN_LABELS_FILE = 'train-labels-idx1-ubyte';
const TEST_IMAGES_FILE = 't10k-images-idx3-ubyte';
const TEST_LABELS_FILE = 't10k-labels-idx1-ubyte';

I made images of same format with MNIST(28*28) so I thought I could just change train and test datas but failed because I don't know what idx3-ubyte format is. The data.js files' URL is here.

How can I generate same ubyte files? or How to use local images or img element directly?

update I examined the data.js file's reading part and managed to generate same file format. It also has header values.

 
async function loadImages(filename) {
  const buffer = await fetchOnceAndSaveToDiskWithBuffer(filename);

  const headerBytes = IMAGE_HEADER_BYTES;
  const recordBytes = IMAGE_HEIGHT * IMAGE_WIDTH;

  const headerValues = loadHeaderValues(buffer, headerBytes);
  assert.equal(headerValues[0], IMAGE_HEADER_MAGIC_NUM);
  assert.equal(headerValues[2], IMAGE_HEIGHT);
  assert.equal(headerValues[3], IMAGE_WIDTH);

  const images = [];
  let index = headerBytes;
  while (index < buffer.byteLength) {
    const array = new Float32Array(recordBytes);
    for (let i = 0; i < recordBytes; i++) {
      // Normalize the pixel values into the 0-1 interval, from
      // the original 0-255 interval.
      array[i] = buffer.readUInt8(index++) / 255;
    }
    images.push(array);
  }

  assert.equal(images.length, headerValues[1]);
  return images;
}

async function loadLabels(filename) {
  const buffer = await fetchOnceAndSaveToDiskWithBuffer(filename);

  const headerBytes = LABEL_HEADER_BYTES;
  const recordBytes = LABEL_RECORD_BYTE;

  const headerValues = loadHeaderValues(buffer, headerBytes);
  assert.equal(headerValues[0], LABEL_HEADER_MAGIC_NUM);

  const labels = [];
  let index = headerBytes;
  while (index < buffer.byteLength) {
    const array = new Int32Array(recordBytes);
    for (let i = 0; i < recordBytes; i++) {
      array[i] = buffer.readUInt8(index++);
    }
    labels.push(array);
  }

  assert.equal(labels.length, headerValues[1]);
  return labels;
}

 
 getData_(isTrainingData) {
    let imagesIndex;
    let labelsIndex;
    if (isTrainingData) {
      imagesIndex = 0;
      labelsIndex = 1;
    } else {
      imagesIndex = 2;
      labelsIndex = 3;
    }
    const size = this.dataset[imagesIndex].length;
    tf.util.assert(
        this.dataset[labelsIndex].length === size,
        `Mismatch in the number of images (${size}) and ` +
            `the number of labels (${this.dataset[labelsIndex].length})`);

    // Only create one big array to hold batch of images.
    const imagesShape = [size, IMAGE_HEIGHT, IMAGE_WIDTH, 1];
    const images = new Float32Array(tf.util.sizeFromShape(imagesShape));
    const labels = new Int32Array(tf.util.sizeFromShape([size, 1]));

    let imageOffset = 0;
    let labelOffset = 0;
    for (let i = 0; i < size; ++i) {
      images.set(this.dataset[imagesIndex][i], imageOffset);
      labels.set(this.dataset[labelsIndex][i], labelOffset);
      imageOffset += IMAGE_FLAT_SIZE;
      labelOffset += 1;
    }

    return {
      images: tf.tensor4d(images, imagesShape),
      labels: tf.oneHot(tf.tensor1d(labels, 'int32'), LABEL_FLAT_SIZE).toFloat()
    };
  }
}

Below is generator code.

const {createCanvas, loadImage} = require('canvas');
const tf = require('@tensorflow/tfjs');
require('@tensorflow/tfjs-node');

const fs = require('fs');

const util = require('util');

// const writeFile = util.promisify(fs.writeFile);
// const readFile = util.promisify(fs.readFile);

(async()=>{

      const canvas = createCanvas(28,28);
      const ctx = canvas.getContext('2d');

      const ch1 = await loadImage('./u.png');
      const ch2 = await loadImage('./q.png');
      const ch3 = await loadImage('./r.png');
      const ch4 = await loadImage('./c.png');
      const ch5 = await loadImage('./z.png');

      console.log(ch1);
      
      ctx.drawImage(ch1, 0, 0);
      const ch1Data = tf.fromPixels(canvas, 1);
      ctx.drawImage(ch2, 0, 0);
      const ch2Data = tf.fromPixels(canvas, 1);
      ctx.drawImage(ch3, 0, 0);
      const ch3Data = tf.fromPixels(canvas, 1);
      ctx.drawImage(ch4, 0, 0);
      const ch4Data = tf.fromPixels(canvas, 1);
      ctx.drawImage(ch5, 0, 0);
      const ch5Data = tf.fromPixels(canvas, 1);

      // console.log(await ch1Data.data());
      const b1 = Buffer.from(await ch1Data.data());
      const b2 = Buffer.from(await ch2Data.data());
      const b3 = Buffer.from(await ch3Data.data());
      const b4 = Buffer.from(await ch4Data.data());
      const b5 = Buffer.from(await ch5Data.data());

      const buffers = [b1,b2,b3,b4,b5];

      const labels = [0,1,3,2,4,0,1,2,1,0,3,0,2,3,4,0,];

      const Images = [];

      const size = labels.length;
      for(var i = 0; i < size;i++){
            Images.push(buffers[labels[i]]);

      }
      const imageHeaderBytes = 16;
      const imageRecordBytes = 28 * 28;
      const labelHeaderBytes = 8;
      const labelRecordBytes = 1;
      let imageBuffer = Buffer.alloc(imageHeaderBytes + size * imageRecordBytes);
      let labelBuffer = Buffer.alloc(labelHeaderBytes + size * labelRecordBytes);
      
      const imageHeaderValues = [2051, size, 28, 28];
      const labelHeaderValues = [2049, size];
      for (let i = 0; i < 4; i++) {
            // Header data is stored in-order (aka big-endian)
            imageBuffer.writeUInt32BE(imageHeaderValues[i], i * 4);
      }
      for (let i = 0; i < 2; i++) {
            // Header data is stored in-order (aka big-endian)
            labelBuffer.writeUInt32BE(labelHeaderValues[i], i * 4);
      }
      let imageindex = imageHeaderBytes;
      let labelindex = labelHeaderBytes;
      for(let i = 0; i < size; i++){
            // imageBuffer = Buffer.concat([imageBuffer, Images[i]]);
            // labelBuffer= Buffer.concat([labelBuffer, Buffer.from([labels[i]])]);
            // labelBuffer= Buffer.concat([labelBuffer, Buffer.from([labels[i]])]);
            const image = Images[i];
            let index = 0;
            while(index < image.byteLength){
                  imageBuffer.writeUInt8(image[index], imageindex);
                  index++;
                  imageindex++;
            }
            labelBuffer.writeUInt8(labels[i], labelindex++);
      }

      fs.writeFileSync('./testGeneratedImageBuffer', imageBuffer);
      fs.writeFileSync('./testGeneratedLabelBuffer', labelBuffer);


})();

1 Answer 1

2

"ubyte" stands for "unsigned byte". It refers to an unsigned 8-bit integer. Each of the two images-ubyte* files contains a series of unsigned 8-bit integers. Every integer is a pixel in an MNIST image and has a value >=0 and <=255.

That's how the images are represented at the pixel level. Now let's take a look at the level of a whole image, consisting of 28 rows and 28 columns. It takes 28 * 28 = 784 such integers to represent an image. In the file, they are organized in a way such that the first 28 integers correspond to the first row, the next 28 integers correspond to the second row and so forth.

All the images in the dataset are represented this way and their integers are concatenated to form the content of an image-ubyte file. Why are there two such files? This is because train-images-idx3-ubyte is the training dataset and t10k-images-idx3-ubyte is the test dataset.

The other two files (labels-ubyte) are the labels for the MNIST images. Like the image-ubyte files, they contain uint8 (i.e., unsigned 8-bit integers). But instead of holding values fro 0-255, the label files have values >=0 and <=9, because there are only 10 image classes in the MNIST dataset.

Hope this is clear.

Sign up to request clarification or add additional context in comments.

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.