How to get Accurate Glyph Coordinates

133 views
Skip to first unread message

Balraj Balakrishnan

unread,
Nov 6, 2014, 7:41:28 AM11/6/14
to pdfne...@googlegroups.com

 

Dear Team,
 
Can you please help me to get the accurate position of the character. I've written the below code based on the given solution but few areas character position is not correct.
 
Private Function ProcessElements(ByVal PDFPage As Page) As String
Try
Using reader As ElementReader = New ElementReader
reader.Begin(PDFPage)
Dim element As Element = reader.Next()
While (Not IsNothing(element)) ' Read page contentsn
Dim type As Element.Type = element.GetType()
If type = element.Type.e_text Then
Dim txt As String = element.GetTextString
Dim gs As GState = element.GetGState()
Dim font As pdftron.PDF.Font = gs.GetFont()
Dim font_size As Double = gs.GetFontSize()
Dim horiz_spacing As Double = gs.GetHorizontalScale() / 100.0
Dim text_mtx As Matrix2D = element.GetTextMatrix()
Dim font_mtx As Matrix2D = New Matrix2D(font_size * horiz_spacing, 0, 0, font_size, 0, 0)
Dim units_per_em As Double = font.GetUnitsPerEm
font_mtx = font_mtx * New Matrix2D(1.0 / units_per_em, 0, 0, -1.0 / units_per_em, 0, 0)
Dim bbox1 As Rect = font.GetBBox()
Dim bbox_height As Double = bbox1.y2 - bbox1.y1
Dim decent As Double = (font_size * bbox1.y1) / bbox_height
Dim ascent As Double = (font_size * bbox1.y2) / bbox_height
Dim ctm As Matrix2D = element.GetCTM()
Dim mtx As Matrix2D = New Matrix2D
mtx.Set(ctm)
mtx.Concat(text_mtx.m_a, text_mtx.m_b, text_mtx.m_c, text_mtx.m_d, text_mtx.m_h, text_mtx.m_v)
Dim font_sz_scale_factor As Double = System.Math.Sqrt(mtx.m_b * mtx.m_b + mtx.m_d * mtx.m_d)
font_size = font_sz_scale_factor * font_size
Dim itr As CharIterator = element.GetCharIterator()
Dim inc As Integer = 0
Dim firstchardata As CharData = Nothing
Dim lastchardata As CharData = Nothing
While itr.HasNext()
If inc = 0 Then
firstchardata = itr.Current()
lastchardata = itr.Current()
inc += 1
Else
lastchardata = itr.Current()
End If
itr.Next()
End While
Dim pos As New Matrix2D(1, 0, 0, 1, firstchardata.x, firstchardata.y)
Dim pos2 As New Matrix2D(1, 0, 0, 1, lastchardata.x, lastchardata.y)
Dim path_mtx As New Matrix2D(element.GetCTM() * element.GetTextMatrix() * pos * font_mtx)
Dim path_mtx2 As New Matrix2D(element.GetCTM() * element.GetTextMatrix() * pos2 * font_mtx)
Dim pdata As PathData = font.GetGlyphPath(lastchardata.char_code, False)
'Dim path_mtx As New Matrix2D(text_mtx * pos * font_mtx)
For i As Integer = 1 To pdata.points.Length - 1 Step 2
path_mtx.Mult(pdata.points(i - 1), pdata.points(i))
Next i
Dim pbbox As New Rect
Dim res As Boolean = GetPathBBox(pdata.points, pdata.points.Length, pdata.operators, pdata.operators.Length, pbbox.x1, pbbox.y1, pbbox.x2, pbbox.y2)
''processing pbbox
End If
element = reader.Next()
End While
reader.End()
End Using
Catch ex As Exception
End Try
End Function
 
Public Function GetPathBBox(ByVal data() As Double, ByVal data_sz As Integer, ByVal opr() As Byte, ByVal opr_szAs Integer, ByRef min_x As Double, ByRef min_y As Double, ByRef max_x As Double, ByRef max_y As Double) AsBoolean
        Dim opr_itr As Integer = 0, opr_end As Integer = opr_sz
        Dim data_itr As Integer = 0, data_end As Integer = data_sz
        Dim x, y As Double
 
 
        Const invalid_value As Double = 1.0E+300
        max_y = -invalid_value
        max_x = max_y
        min_y = invalid_value
        min_x = min_y
        Do While opr_itr < opr_end
 
 
            Select Case CType(CInt(opr(opr_itr)), PathSegmentType)
                Case PathSegmentType.e_moveto
                   If Not (data_itr + 2 <= data_end) Then
                        Return False
                    End If
                    x = data(data_itr)
                    data_itr += 1
                    If x < min_x Then
                        min_x = x
                    End If
                    If x > max_x Then
                        max_x = x
                    End If
 
                    y = data(data_itr)
                    data_itr += 1
                    If y < min_y Then
                        min_y = y
                    End If
                    If y > max_y Then
                        max_y = y
                    End If
                Case PathSegmentType.e_lineto
                    If Not (data_itr + 2 <= data_end) Then
                        Return False
                    End If
                    x = data(data_itr)
                    data_itr += 1
                    If x < min_x Then
                        min_x = x
                    End If
                    If x > max_x Then
                        max_x = x
                    End If
 
                    y = data(data_itr)
                    data_itr += 1
                    If y < min_y Then
                        min_y = y
                    End If
                    If y > max_y Then
                        max_y = y
                    End If
 
                Case PathSegmentType.e_rect
                    If Not (data_itr + 4 <= data_end) Then
                        Return False
                    End If
 
                    x = data(data_itr)
                    data_itr += 1
                    If x < min_x Then
                        min_x = x
                    End If
                    If x > max_x Then
                        max_x = x
                    End If
 
                    y = data(data_itr)
                    data_itr += 1
                    If y < min_y Then
                        min_y = y
                    End If
                    If y > max_y Then
                        max_y = y
                    End If
 
                    x = x + data(data_itr)
                    data_itr += 1
                    If x < min_x Then
                        min_x = x
                    End If
                    If x > max_x Then
                        max_x = x
                    End If
 
                    y = y + data(data_itr)
                    data_itr += 1
                    If y < min_y Then
                        min_y = y
                    End If
                    If y > max_y Then
                        max_y = y
                    End If
 
                Case PathSegmentType.e_cubicto
                    If Not (data_itr + 6 <= data_end) Then
                        Return False
                    End If
 
                    x = data(data_itr)
                    data_itr += 1
                    If x < min_x Then
                        min_x = x
                    End If
                    If x > max_x Then
                        max_x = x
                    End If
 
                    y = data(data_itr)
                    data_itr += 1
                    If y < min_y Then
                        min_y = y
                    End If
                    If y > max_y Then
                        max_y = y
                    End If
 
                    x = data(data_itr)
                    data_itr += 1
                    If x < min_x Then
                        min_x = x
                    End If
                    If x > max_x Then
                        max_x = x
                    End If
 
                    y = data(data_itr)
                    data_itr += 1
                    If y < min_y Then
                        min_y = y
                    End If
                    If y > max_y Then
                        max_y = y
                    End If
 
                    x = data(data_itr)
                    data_itr += 1
                    If x < min_x Then
                        min_x = x
                    End If
                   If x > max_x Then
                        max_x = x
                    End If
 
                    y = data(data_itr)
                    data_itr += 1
                    If y < min_y Then
                        min_y = y
                    End If
                    If y > max_y Then
                        max_y = y
                    End If
 
                Case PathSegmentType.e_conicto
                    If Not (data_itr + 4 <= data_end) Then
                        Return False
                    End If
 
                    x = data(data_itr)
                    data_itr += 1
                    If x < min_x Then
                        min_x = x
                    End If
                    If x > max_x Then
                        max_x = x
                    End If
 
                    y = data(data_itr)
                    data_itr += 1
                    If y < min_y Then
                        min_y = y
                    End If
                    If y > max_y Then
                        max_y = y
                    End If
 
                    x = data(data_itr)
                    data_itr += 1
                    If x < min_x Then
                        min_x = x
                    End If
                    If x > max_x Then
                        max_x = x
                    End If
 
                    y = data(data_itr)
                    data_itr += 1
                    If y < min_y Then
                        min_y = y
                    End If
                    If y > max_y Then
                        max_y = y
                    End If
 
            End Select
            opr_itr += 1
        Loop
 
        Return (min_x <> invalid_value AndAlso min_y <> invalid_value)
    End Function

 
query.png
sample.pdf

Aaron

unread,
Nov 7, 2014, 8:37:47 PM11/7/14
to pdfne...@googlegroups.com
Hello Balraj,

Thank you for posting this code and sample document.  Unfortunately, it's a little unclear what is about the coordinates found by your ProcessElements.  What exactly are you seeing, and how would you like for it to work?

Thanks in advance for the clarification.
Reply all
Reply to author
Forward
0 new messages