За един проект имах нужда да имам различните "лоши" адреси от www.shallalist.de под формата на mysql таблици и това е набързо написан скрипт който парсва и генерира sql готов за импортиране:

import sys
import os

dir = "C:\\Users\\Yuks\\Desktop\\BL\\"

categories_table = "bl_categories"
elements_table = "bl_elements"


def ls(dir, hidden=False, relative=True):
    nodes = []
    for nm in os.listdir(dir):
        if not hidden and nm.startswith('.'):
            continue
        if not relative:
            nm = os.path.join(dir, nm)
        nodes.append(nm)
    nodes.sort()
    return nodes



def gen_items():
    dirs = ls(dir)
    for el in dirs:
        try:
            file = dir + el + "/domains"
            with open(file) as fileobject:
                for line in fileobject:
                    if line != "":
                        rr = "INSERT INTO `{el_cat}` (`cat_name`, `url`) VALUES ('{cat_name}', '{domain}');"
                        rr = rr.format(el_cat = elements_table,cat_name = el.strip(),domain = line.strip())
                        print rr
        except:
            pass
        
        
def categories():
    file = dir + "global_usage"
    r = []
    with open(file) as fileobject:
        name = ""
        desc = ""
        for line in fileobject:
            tmp = {}
            if "#" not in line:
                if "NAME:" in line:
                    name = line.replace("NAME:", "").strip()
                    
                elif "DESC EN" in line:
                    desc = line.replace("DESC EN","").strip()
                else:
                    pass
                
            
            if name != "" and desc != "":
                rr = {}
                rr['name'] = name
                rr['desc'] = desc
                name = desc = ""
                r.append( rr )
   
                 
    for el in r:
        print (el['name'],el['desc'])
        print   

def gen_cats_sql_tables():
    r = """
CREATE TABLE `{cat}` (
    `id` INT(10) NOT NULL AUTO_INCREMENT,
    `name` VARCHAR(500) NOT NULL,
    `desc` TEXT NOT NULL,
    PRIMARY KEY (`id`)
)
COLLATE='utf8_general_ci'
ENGINE=InnoDB;
    """""
    r = r.format(cat = categories_table)
    return r            

def gen_items_sql_tables():
    r = """
CREATE TABLE `{elements}` (
    `id` INT(10) NOT NULL AUTO_INCREMENT,
    `cat_name` VARCHAR(500) NOT NULL,
    `url` TEXT NOT NULL,
    PRIMARY KEY (`id`)
)
COLLATE='utf8_general_ci'
ENGINE=InnoDB;
    """""
    r = r.format(elements = elements_table)
    return r            

 
print gen_cats_sql_tables()   
print gen_items_sql_tables()   
gen_items()
Файлът със лошите адреси може да се свали от http://www.shallalist.de/Downloads/shallalist.tar.gz
ps: би трябвало да може да работи и с http://urlblacklist.com/