Backup your del.icio.us account using Perl and LWP

This file uses the del.icio.us API to backup a given account’s bookmarks. It also creates an index of tag -> URL mappings, to be used by an automatic redirection script. Gzipped raw source code is here.


#!/usr/bin/env perl
# -*- coding: utf-8 -*- 
#
# Written by Aidan Kehoe, Wed Aug  9 22:56:17 CEST 2006. Public domain.

use strict;
use warnings;
use DB_File;
use Date::Parse qw/str2time/;
use Encode;
use Fcntl;
use File::Temp qw/tempfile tempdir/;
use HTTP::Request;
use LWP::UserAgent;
use XML::Parser qw/new parserfile/;

use constant {
    'TAG_DB_FILE_NAME' => 'YOU NEED TO SET THIS',
    # Where the backup of your del.icio.us entries is stored.
    'DELICIOUS_CURRENT_DUMP' => 'YOU NEED TO SET THIS TOO',
    'DELICIOUS_USER_NAME' => 'AND THIS',
    'DELICIOUS_PASSWORD' => 'AND THIS',
};

# Use the del.icio.us API to check if a bookmark update has been made since
# last backup; if it has, download it.
sub backup_delicious {
    my ($buffer, $ua, $response, $old_update, $new_update,
        $tempfile_name, $tempdir, $request);

    $ua = LWP::UserAgent->new( 'cookie_jar' => {},
                               'requests_redirectable' =>
                               [ 'GET', 'HEAD', 'POST' ],
                               'timeout' => 30,
                               'agent' =>
                               DELICIOUS_USER_NAME." del.icio.us backup script." );

    if (open DEL_FILE, DELICIOUS_CURRENT_DUMP) {
        sysread DEL_FILE, $buffer, 1024;
        close DEL_FILE;

        $buffer =~ /<posts update="([^"]+)" user="/;
        $old_update = str2time $1;
    } else {
        print "backup-del.icio.us.pl: could not open the old dump\n";
        $old_update = 0;
    }

    $request = new HTTP::Request GET =>
        'https://api.del.icio.us/v1/posts/update';
    $request->authorization_basic(DELICIOUS_USER_NAME, DELICIOUS_PASSWORD);
    $response = $ua->request($request);

    unless ($response->is_success) {
        # Failed; oh well, we'll get it tomorrow.
        print "backup-del.icio.us.pl: failed checking for updates: ".
            $response->status_line."\n";
        return undef;
    }

    $response->content =~ /<update time="([^"]+)" /;
    $new_update = str2time $1;

    if ($old_update >= $new_update) {
        return undef;
    }

    # Provide somewhere to download the posts to.
    $tempdir = tempdir( CLEANUP => 1);
    (undef, $tempfile_name) = tempfile( DIR => $tempdir);

    $request = new HTTP::Request GET =>
        'https://api.del.icio.us/v1/posts/all?';
    $request->authorization_basic(DELICIOUS_USER_NAME, DELICIOUS_PASSWORD);
    $response = $ua->request($request, $tempfile_name);

    unless ($response->is_success) {
        # Failed; oh well, we'll get it tomorrow.
        print "backup-del.icio.us.pl: failed downloading posts: ".
            $response->status_line."\n";
        return undef;
    }

    open(DEL_FILE, '>', DELICIOUS_CURRENT_DUMP) or
        print "backup-del.icio.us.pl: could not replace the old dump\n",
            return undef;
    open NEW_DEL_FILE, $tempfile_name or
        print "backup-del.icio.us.pl: could not open the new dump\n",
            return undef;

    print DEL_FILE $buffer while sysread NEW_DEL_FILE, $buffer, 8192;

    close NEW_DEL_FILE;
    close DEL_FILE;

    return 1;
}

sub rebuild_redirect_map {
    my %tag_hash;

    # Avoid duplicate entries in the hash.
    unlink TAG_DB_FILE_NAME;

    my $tag_db = tie %tag_hash, 'DB_File', TAG_DB_FILE_NAME,
                 O_RDWR|O_CREAT, 0644
        or die "Cannot open ".TAG_DB_FILE_NAME.": $!\n";

    my $xml_parser = new XML::Parser('Style' => 'Tree');
    my $tree = $xml_parser->parsefile(DELICIOUS_CURRENT_DUMP);
    my $expecting_posts_array = 0;
    my ($postinfo, $expecting_post_array, $postarr);

    for my $postsp (@{$tree}) {
        if ($postsp eq '0') {
            $expecting_posts_array = 0;
            next;
        }

        if ($postsp eq 'posts') {
            $expecting_posts_array = 1;
            next;
        }

        unless ($expecting_posts_array) {
            next;
        }

        $expecting_post_array = 0;

        for $postarr (@{$postsp}) {
            if ($postarr eq '0') {
                $expecting_post_array = 0;
                next;
            }

            if ($postarr eq 'post') {
                $expecting_post_array = 1;
                next;
            }

            unless ($expecting_post_array) {
                next;
            }

            $postinfo = shift @{$postarr};

            # This is broken in the abstract, I shouldn't need to do this if
            # I turn on the appropriate modules above (that is, "use open,"
            # and so on.).  Bah, Perl.
            #
            # Of course, it does mean it shouldn't do Unicode lowercase
            # handling, just ASCII. Yay, Perl.
            $postinfo->{'href'} = encode('utf8', $postinfo->{'href'});
            $postinfo->{'tag'} = encode('utf8', $postinfo->{'tag'});

            my @tags = split / /, $postinfo->{'tag'};

            for my $tag (@tags) {
                # We're not case sensitive.
                $tag = lc $tag;
                if (defined $tag_hash{$tag}) {
                    # No automatic serialisation--this is Perl!
                    $tag_hash{$tag} .= "\0".$postinfo->{'href'};
                } else {
                    $tag_hash{$tag} = $postinfo->{'href'};
                }
            }
        }
    }

    undef $tag_db;
    untie %tag_hash;
}

sub main {
    my ($dump_mtime, $db_mtime);

    backup_delicious;

    $dump_mtime = (stat DELICIOUS_CURRENT_DUMP)[9];
    $db_mtime = ((stat TAG_DB_FILE_NAME)[9] or 0);

    rebuild_redirect_map if $dump_mtime > $db_mtime;
}

main;

Aidan Kehoe
Last modified: Thu Aug 10 13:24:28 Westeuropäische Normalzeit 2006