User Tools

Site Tools


projects:digikey_partsdb

This is an old revision of the document!


digikey parts slurper

fetch www.digikey.com/product-search/en?FV=

grep for catfilterlink

remove beginning of line to inclusive

remove end of line from inclusive

produces following info
grabbing FV's

we need the FV's to crawl each subsection. grab all the above urls, make sure Results per Page = 500. The CSV download is capped at 500 results per fetch, so no point increasing this value.

  • <input type=hidden name=FV value=fff40000,fff80000>

also grab the total page count

  • <a class=“Last” href=”/product-search/en/undefined-category/undefined-family/0/page/8”>Last</a>

The page/8“ is the total page count, pages start from 1

grab the FV value and page count, and store for each of the above URL's

crawl individual pages

curl with a valid useragent i used –useragent “Chrome/1.0” but vary it to avoid rate limiters.

The response has 4 bytes at the front we don't want, so a simple byteskip script or piece of code.

 

#include <stdio.h>
#include <stdlib.h>

int main(int argc,char*argv[])
{
	FILE *fp,*ofp;

	if( argc < 4 ) {
		fprintf(stderr,"%s usage : infile outfile offset\n",argv[0]);
		exit(-1);
	}

	fp =fopen( argv[1],  "rb");
	if( fp == NULL ) {
		fprintf(stderr,"Couldnt open input file %s\n",argv[1]);
		exit(-2);
	}

	unsigned long length ;

	fseek(fp,0,SEEK_END);

	length = ftell( fp ) ;


	if( length == 0 ) {
		
		fclose( fp );

		fprintf(stderr,"zero length file %s\n",argv[1]);
		exit(-3);
	}

	unsigned long offset;

	//skip offset
	offset = strtoul (argv[3], NULL, 0);

	if( offset >= length ){
		
		fclose( fp );

		fprintf(stderr,"offset is  outside file length %s at %d\n",argv[1], offset);
		exit(-5);
	}

	// set to skip position
	fseek(fp,offset,SEEK_SET);

	unsigned char *buffer = NULL;

	buffer = (unsigned char *)malloc( length - offset );
	if( buffer == NULL ) {
		
		fclose(fp);

		fprintf(stderr,"Couldnt allocate output buffer %lu\n", offset );
		exit(-6);
	}

	// read whole buffer.
	if( fread(buffer,1,length - offset ,fp ) != (length-offset) ) {
		fclose(fp);
		fprintf(stderr,"Couldnt allocate output buffer %lu\n", offset );
		exit(-7);

	}

	// open output file for writing.
	ofp = fopen( argv[2],  "wb");
	
	if( ofp == NULL ) {
		fclose(fp);
		
		free( buffer );
		buffer = NULL;
		fprintf(stderr,"Couldnt open output file %s\n",argv[2]);
		exit(-8);
	}

	if( fwrite(buffer,1,length-offset,ofp) != (length-offset) ) { 
		fclose(fp);
		fclose(ofp);
		fprintf(stderr,"Couldnt write output file %s\n", argv[2]);
		exit(-9);
	}

	free( buffer );

	fclose(fp);
	fclose(ofp);


	return 0;
}
projects/digikey_partsdb.1381599763.txt.gz · Last modified: 2013/10/12 10:42 by charliex