以下是本人尝试用excel宏来检查epidata中的重复字段。
Sub check()
Dim fs, f As Object
Dim File_Name, Line_Text, Field_Name, x As String
Dim Line_Len, Pos_Start, Pos_Cur, Field_Num, Field_Err, n As
Integer
'定义变量。其中File_Name为要检查的文件路径,fs和f为文件对象
'Line_Text为逐行读取的字符串, Line_Len为字符串的长度
'Pos_Start为当前行扫描的开始位置, Pos_Cur为当前扫描到的位置
'Field_Name为扫描到的字段名称, Field_Num为扫描到的字段要填入表格的行位置, Field_Err为扫描到的重复字段
计数
'x为逐个读取字段的字符,n为临时变量
With Application.FileDialog(msoFileDialogOpen) '打开文件对话
框
.AllowMultiSelect = False
.Filters.Clear
.Filters.Add "调查表文件", "*.qes; *.txt", 1
If .Show = True Then
File_Name = .SelectedItems(1)
Else
MsgBox "打开文件错误,程序运行中止"
Exit Sub
End If
End With
Set fs = CreateObject("Scripting.FileSystemObject") '创建文件对象
Set f = fs.OpenTextFile(File_Name, 1, TristateFalse)
Cells.Select '全选当前表格
Cells.MergeCells = False '取消合并单元格
Cells.HorizontalAlignment = xlLeft '单元格左对齐
Selection.ClearContents '清空表格内容
Selection.Interior.ColorIndex = xlNone '清空表格的背景
色
Rows(1).Font.Bold = True '第一行添加列名并
加粗
Cells(1, 1) = "字段名称"
Cells(1, 2) = "字段所在行"
Cells(1, 3) = "字段所在列"
Cells(1, 4) = "被重复的字段名称"
Cells(1, 5) = "被重复的字段所在行"
Cells(1, 6) = "被重复的字段所在列"
Field_Num = 2 '从第二行开始逐行添
加扫描到的字段名称
Field_Err = 0 '有重复字段的计数变
量初始化置空
Do While f.AtEndOfStream <> True '扫描文件直到文件末
尾
Pos_Start = 1 '扫描开始位置变量初始化
Field_Name = "" '扫描到的字段名称变量初始
化
Line_Text = f.readline '逐行读取文件
Line_Len = Len(Line_Text) '行字符串的长度
Do While Pos_Cur < Line_Len '扫描行直到行结束
Pos_Cur = InStr(Pos_Start, Line_Text, "{") '查找qes文件中字段的开始
标志字符'{'
If Pos_Cur <> 0 Then '如果存在字符'{',则开始
读取字段名称,否则读取下一行继续扫描
Pos_Cur = Pos_Cur + 1 '从'{'后一个位置开始取字
段名称
x = Mid(Line_Text, Pos_Cur, 1) '读取字段名称的首个字符
Do While x <> "}" And x <> "" And x <> "{" '循环取
字符,直到遇到字段结束标志'}'或者行结束
Field_Name = Field_Name & x '将读取到的字符添加到字段名
称变量中
Pos_Cur = Pos_Cur + 1 '指针后移一位
x = Mid(Line_Text, Pos_Cur, 1) '读取下一个字符
Loop
Cells(Field_Num, 1) = Field_Name '将字段名以及字段在文件中所
在行和列依次填入excel表中
Cells(Field_Num, 2) = f.Line - 1
Cells(Field_Num, 3) = Pos_Cur
For n = 2 To (Field_Num - 1) '检查是否与之前扫描到的字段
重复
If Field_Name = Cells(n, 1) Then Exit For
Next n
If n <= Field_Num - 1 Then '如果重复,则在excel右
方单元格填入被重复的字段位置,并且用不同的背景色标识
Cells(Field_Num, 4) = Cells(n, 1)
Cells(Field_Num, 5) = Cells(n, 2)
Cells(Field_Num, 6) = Cells(n, 3)
Range(Cells(Field_Num, 1), Cells(Field_Num,
3)).Interior.Color = RGB(250, 250, 120)
Range(Cells(Field_Num, 4), Cells(Field_Num,
6)).Interior.Color = RGB(132, 181, 234)
Range(Cells(n, 1), Cells(n, 3)).Interior.Color =
RGB(132, 181, 234)
Field_Err = Field_Err + 1 '重复字段计数变量加1
End If
Field_Num = Field_Num + 1 '字段计数变量加1
Field_Name = "" '字段变量置空
Pos_Start = Pos_Cur '对当前行继续搜索
Else
Exit Do
End If
Loop
Loop
Range(Cells(2, 2), Cells(Field_Num - 1, 3)).HorizontalAlignment =
xlRight
Range(Cells(2, 5), Cells(Field_Num - 1, 6)).HorizontalAlignment =
xlRight
Range(Cells(Field_Num, 1), Cells(Field_Num, 6)).Select
Selection.Merge
Range(Cells(Field_Num, 1), Cells(Field_Num, 6)).Select
ActiveCell.FormulaR1C1 = "所检查的调查表文件为 " & File_Name
Range(Cells(Field_Num + 1, 1), Cells(Field_Num + 1, 6)).Select
Selection.Merge
Range(Cells(Field_Num + 1, 1), Cells(Field_Num + 1, 6)).Select
ActiveCell.FormulaR1C1 = "共有 " & Field_Num - 2 & " 个字段"
Range(Cells(Field_Num + 2, 1), Cells(Field_Num + 2, 6)).Select
Selection.Merge
Range(Cells(Field_Num + 2, 1), Cells(Field_Num + 2, 6)).Select
ActiveCell.FormulaR1C1 = "其中有 " & Field_Err & " 个字段重复"
f.Close
End Sub