Heads up! This post was written 13 years ago. Some information might be outdated or may have changed since then.
import sys
import os
dir = "C:\\Users\\Yuks\\Desktop\\BL\\"
categories_table = "bl_categories"
elements_table = "bl_elements"
def ls(dir, hidden=False, relative=True):
nodes = []
for nm in os.listdir(dir):
if not hidden and nm.startswith('.'):
continue
if not relative:
nm = os.path.join(dir, nm)
nodes.append(nm)
nodes.sort()
return nodes
def gen_items():
dirs = ls(dir)
for el in dirs:
try:
file = dir + el + "/domains"
with open(file) as fileobject:
for line in fileobject:
if line != "":
rr = "INSERT INTO `{el_cat}` (`cat_name`, `url`) VALUES ('{cat_name}', '{domain}');"
rr = rr.format(el_cat=elements_table, cat_name=el.strip(), domain=line.strip())
print rr
except:
pass
def categories():
file = dir + "global_usage"
r = []
with open(file) as fileobject:
name = ""
desc = ""
for line in fileobject:
tmp = {}
if "#" not in line:
if "NAME:" in line:
name = line.replace("NAME:", "").strip()
elif "DESC EN" in line:
desc = line.replace("DESC EN", "").strip()
else:
pass
if name != "" and desc != "":
rr = {}
rr['name'] = name
rr['desc'] = desc
name = desc = ""
r.append(rr)
for el in r:
print (el['name'], el['desc'])
print
def gen_cats_sql_tables():
r = """ CREATE TABLE `{cat}` (
`id` INT(10) NOT NULL AUTO_INCREMENT,
`name` VARCHAR(500) NOT NULL,
`desc` TEXT NOT NULL,
PRIMARY KEY (`id`)
) COLLATE='utf8_general_ci' ENGINE=InnoDB;
"""
r = r.format(cat=categories_table)
return r
def gen_items_sql_tables():
r = """ CREATE TABLE `{elements}` (
`id` INT(10) NOT NULL AUTO_INCREMENT,
`cat_name` VARCHAR(500) NOT NULL,
`url` TEXT NOT NULL,
PRIMARY KEY (`id`)
) COLLATE='utf8_general_ci' ENGINE=InnoDB;
"""
r = r.format(elements=elements_table)
return r
print gen_cats_sql_tables()
print gen_items_sql_tables()
gen_items() Файлът със лошите адреси може да се свали от http://www.shallalist.de/Downloads/shallalist.tar.gz ps: би трябвало да може да работи и с http://urlblacklist.com/