07/04/02 19:56:25
>>181
import re
import urllib
rexp1 = re.compile(r"[ ][-][ ][-][ ][[]|[]][ ]\"|\"[ ]\"|\"[ ]|[ ]\"|\"|[ ]")
def convert(linein):
record = rexp1.sub(",", linein, 9).rstrip("\"").split(",")
urlpaths = record.pop(4).split("/")
def decode(x):
bin = urllib.unquote(x)
try:
return unicode(bin, "gbk")
except UnicodeDecodeError:
return unicode(bin, "utf-8")
record += map(decode, urlpaths)
out = u",".join([x.rstrip() for x in record]) + u"\n"
return out.encode("utf-8")
converted = (convert(x) for x in file("112115.txt", "r"))
file("112115.csv", "w").writelines(converted)