今天写代码时,编译不过去,最后发现,配对符写丢了一个:
import sys import re import codecs class TestCls(object): def __init__(self): return None def ReadSqlFile(self, filename, encoding): with codecs.open(filename=filename, mode='r', encoding=encoding) as f: content = f.read() # sql文件,以"--"作为注释符(暂不支持其他注释符),解析文件之前,先把注释替换掉,否则影响解析 # 正则表达式,零宽断言,零宽度正预测先行断言 content = re.sub(r"--.*(?=[\r\n]+?)", "", content) # 以"CREATE TABLE IF NOT EXISTS"作为建表的开始标志,以");"作为结束标志 patternTable = "(" + """CREATE TABLE IF NOT EXISTS[ \t]+""" + """(?P<tableName>[a-zA-Z0-9_]+)""" + """[ \t\r\n]*\(""" + """(?P<allFieldContent>[^;]+)""" + """\);""" + ")" # 定义字段时必须要带"NULL"或"NOT NULL",否则解析不出来,例: # [colName] [colType] [NULL 或 NOT NULL] [后续字段(PRIMARY KEY等)] patternField = """([ \t]*(?P<colName>[a-zA-Z0-9_]+)[ \t]+(?P<colType>[a-zA-Z0-9\(\)]+)[ \t]+((NULL)|(NOT NULL))(?P<otherStr>.*))""" tableContentList = re.findall(patternTable, content) for tableContent in list(zip(*tableContentList))[0]: sreMatch = re.match(patternTable, tableContent) tableName = sreMatch.groupdict()["tableName"] allFieldContent = sreMatch.groupdict(["allFieldContent"] #allFieldContent = sreMatch.groupdict()["allFieldContent"] # 如果"()"等符号丢失了一个的话,会报错出来很诡异的错误,此时应当意识到,可能丢失了配对符等. # 此时,建议使用排除法,一点一点的注释掉代码,找到出错的代码块,然后仔细查找原因. fieldContentList = re.findall(patternField, allFieldContent) for fieldContent in list(zip(*fieldContentList))[0]: sreMatch = re.match(patternField, fieldContent) colName = sreMatch.groupdict()["colName"] colType = sreMatch.groupdict()["colType"] otherStr = sreMatch.groupdict()["otherStr"] print(colName) print(colType) print(otherStr) return None if __name__ == "__main__": sqlStr = """ DROP TABLE IF EXISTS nnnnnn_etf_list; CREATE TABLE IF NOT EXISTS nnnnnn_etf_list( date INTEGER NOT NULL,--日期 code VARCHAR(32) NOT NULL,--code field1 VARCHAR(64) NOT NULL,--字段1 field2 VARCHAR(64) NULL,--字段2 field3 VARCHAR(64) NULL,--字段3 field4 VARCHAR(64) NULL,--字段4 field5 VARCHAR(64) NULL,--字段5 field6 VARCHAR(64) NULL,--字段6 PRIMARY KEY(date,code,field1) );""" obj = TestCls() fileName = r"D:\sql.sql" obj.ReadSqlFile(fileName, 'utf8') sys.exit(0)完。