example for parsing t-sql to get relationships between the tables

965 views
Skip to first unread message

alex;

unread,
Jan 5, 2018, 11:02:19 PM1/5/18
to antlr-discussion
using System;
using System.IO;
using Antlr4.Runtime;
using Antlr4.Runtime.Misc;

namespace tsql1
{
    class Program
    {
        static void Main(string[] args)
        {
            //try
            //{

            string text = System.IO.File.ReadAllText(@"c:\dev\antlr4\grammars-v4-master\tsql\examples\dml_select.sql");
            StringReader reader = new StringReader(text);
            // В качестве входного потока символов устанавливаем ...
            AntlrInputStream input = new AntlrInputStream(reader);
            // Настраиваем лексер на этот поток
            tsqlLexer lexer = new tsqlLexer(input);
            // Создаем поток токенов на основе лексера
            CommonTokenStream tokens = new CommonTokenStream(lexer);
            // Создаем парсер
            tsqlParser parser = new tsqlParser(tokens);
            // Specify our entry point
            //tsqlParser.Query_specificationContext  
            tsqlParser.Tsql_fileContext Tsql_fileContext1 = parser.tsql_file();
            Console.WriteLine("Tsql_fileContext1.ChildCount = " + Tsql_fileContext1.ChildCount.ToString());

            // Walk it and attach our listener
            Antlr4.Runtime.Tree.ParseTreeWalker walker = new Antlr4.Runtime.Tree.ParseTreeWalker();
            AntlrTsqListener listener = new AntlrTsqListener();
            walker.Walk(listener, Tsql_fileContext1);
             //}
             //    catch (Exception e)
             //{
             //    Console.WriteLine(e.Message);
             //}
             Console.ReadKey();
        }

    }

    public class AntlrTsqListener : tsqlBaseListener
    {
        private enum JoinMode {
            Undefined,
            Where,
            Join
            };
        private JoinMode mode;
        private enum BranchType
        {
            Select,
            Table_sources,
            Search_condition
            //Join
        };
        private BranchType branch;

        private string alias = "";

        public override void EnterQuery_specification(tsqlParser.Query_specificationContext ctx)
        {
            mode = JoinMode.Undefined;
        }
        public override void EnterTable_sources(tsqlParser.Table_sourcesContext ctx)
        {
            if (ctx.ChildCount > 1)
                mode = JoinMode.Where;
            branch = BranchType.Table_sources;
        }
        public override void EnterTable_source_item_joined([NotNull] tsqlParser.Table_source_item_joinedContext ctx)
        {
            if ((mode == JoinMode.Undefined  & ctx.ChildCount == 1) || (mode == JoinMode.Where))
                return;
            mode = JoinMode.Join;
            branch = BranchType.Table_sources;
        }
        public override void EnterTable_name_with_hint([NotNull] tsqlParser.Table_name_with_hintContext ctx)
        {
            if (mode == JoinMode.Undefined)
                return;
            if (branch == BranchType.Table_sources)
                Console.WriteLine(branch.ToString());
            alias = "";
        }
        public override void EnterTable_name([NotNull] tsqlParser.Table_nameContext ctx)
        {
            if (branch == BranchType.Search_condition || branch == BranchType.Select || mode == JoinMode.Undefined)
                return;
            Console.WriteLine(ctx.GetText());
        }
        public override void EnterTable_alias([NotNull] tsqlParser.Table_aliasContext ctx)
        {
            if (branch == BranchType.Search_condition || branch == BranchType.Select | mode == JoinMode.Undefined)
                return;
            alias = ctx.GetChild(0).GetText();
            Console.WriteLine("alias=" + alias);
        }
        public override void EnterSearch_condition([NotNull] tsqlParser.Search_conditionContext ctx)
        {
            if (mode == JoinMode.Undefined)
                return;
            branch = BranchType.Search_condition;
            Console.WriteLine("Search_condition");
            Console.WriteLine(ctx.GetText());
            return;
        }
        public override void EnterSelect_statement([NotNull] tsqlParser.Select_statementContext ctx)
        {
            Console.WriteLine("Select_statement");
            branch = BranchType.Select;
            return;
        }
---
--input
---
select *
from t1, t2
where t1.id = t2.id

SELECT p.*
FROM Production.Product AS p
ORDER BY Name ASC;
GO

select *
from zxc as t1
    inner join qwe t2 on t1.id = t2.id
    inner join asd t3 on t3.id = t2.id
...
---
--output 
---
Tsql_fileContext1.ChildCount = 105
Select_statement
Table_sources
t1
Table_sources
t2
Search_condition
t1.id=t2.id
Select_statement
Select_statement
Table_sources
zxc
alias=t1
Table_sources
qwe
alias=t2
Search_condition
t1.id=t2.id
Table_sources
asd
alias=t3
Search_condition
t3.id=t2.id
...

WBR, alex;
good luck

alex;

unread,
Jan 10, 2018, 5:42:53 PM1/10/18
to antlr-di...@googlegroups.com
using System;
using System.IO;
using Antlr4.Runtime;
using Antlr4.Runtime.Misc;

namespace tsql1
{
   
class Program
   
{
       
static void Main(string[] args)
       
{

           
try
           
{
               
//string text = System.IO.File.ReadAllText(@"c:\dev\antlr4\grammars-v4-master\tsql\examples\dml_select.sql");
               
//StringReader reader = new StringReader(text);
               
//// В качестве входного потока символов устанавливаем ...
               
//AntlrInputStream input = new AntlrInputStream(reader);

               
// Настраиваем лексер на этот поток

               
               
TSqlLexer lexer = new TSqlLexer(new CaseChangingCharStream(new AntlrFileStream(@"c:\dev\antlr4\grammars-v4-master\tsql\examples\dml_select1.sql"), true));//(input);

               
// Создаем поток токенов на основе лексера
               
CommonTokenStream tokens = new CommonTokenStream(lexer);
               
// Создаем парсер

               
TSqlParser parser = new TSqlParser(tokens);

               
// Specify our entry point

               
//TSqlParser.Query_specificationContext  
               
TSqlParser.Tsql_fileContext Tsql_fileContext1 = parser.tsql_file();

               
Console.WriteLine("Tsql_fileContext1.ChildCount = " + Tsql_fileContext1.ChildCount.ToString());

               
// Walk it and attach our listener
               
Antlr4.Runtime.Tree.ParseTreeWalker walker = new Antlr4.Runtime.Tree.ParseTreeWalker();
               
AntlrTsqListener listener = new AntlrTsqListener();
                walker
.Walk(listener, Tsql_fileContext1);
           
}

           
catch (Exception e)
           
{
               
Console.WriteLine(e.Message);
               
Console.ReadKey();
           
}
             
Console.ReadKey();
       
}

   
}

   
public class AntlrTsqListener : TSqlParserBaseListener

   
{
       
private enum JoinMode {
           
Undefined,
           
Where,
           
Join
           
};
       
private JoinMode mode;
       
private enum BranchType
       
{
           
Select,
           
Table_sources,
           
Search_condition
           
//Join
       
};
       
private BranchType branch;

       
private string alias = "";


       
public override void EnterQuery_expression(TSqlParser.Query_expressionContext ctx)
       
{
            mode
= JoinMode.Undefined;
       
}
       
public override void EnterTable_sources(TSqlParser.Table_sourcesContext ctx)

       
{
           
if (ctx.ChildCount > 1)
                mode
= JoinMode.Where;
            branch
= BranchType.Table_sources;
       
}

       
public override void EnterTable_source_item_joined([NotNull] TSqlParser.Table_source_item_joinedContext ctx)
       
{
           
//int ii;
           
//for (ii = 0; ii < ctx.ChildCount; ++ii)
           
//{
           
//    Console.WriteLine("ii=" + ii.ToString());
           
//    Console.WriteLine(ctx.GetChild(ii).GetType().ToString());
           
//    Console.WriteLine(ctx.GetChild(ii).GetText());
           
//}

           
if ((mode == JoinMode.Undefined  & ctx.ChildCount == 1) || (mode == JoinMode.Where))
               
return;
            mode
= JoinMode.Join;
            branch
= BranchType.Table_sources;
       
}

       
public override void EnterTable_name_with_hint([NotNull] TSqlParser.Table_name_with_hintContext ctx)

       
{
           
if (mode == JoinMode.Undefined)
               
return;
           
if (branch == BranchType.Table_sources)
               
Console.WriteLine(branch.ToString());
           
alias = "";
       
}

       
public override void EnterTable_name([NotNull] TSqlParser.Table_nameContext ctx)

       
{
           
if (branch == BranchType.Search_condition || branch == BranchType.Select || mode == JoinMode.Undefined)
               
return;
           
Console.WriteLine(ctx.GetText());

        }
       
public override void EnterTable_alias([NotNull] TSqlParser.Table_aliasContext ctx)

       
{
           
if (branch == BranchType.Search_condition || branch == BranchType.Select || mode == JoinMode.Undefined)
               
return;
           
alias = ctx.GetChild(0).GetText();
           
Console.WriteLine("alias=" + alias);
       
}

       
public override void EnterSearch_condition([NotNull] TSqlParser.Search_conditionContext ctx)

       
{
           
if (mode == JoinMode.Undefined)
               
return;
            branch
= BranchType.Search_condition;
           
Console.WriteLine("Search_condition");
           
Console.WriteLine(ctx.GetText());
           
return;
       
}

       
public override void EnterSelect_statement([NotNull] TSqlParser.Select_statementContext ctx)

Lukáš Kostka

unread,
Feb 20, 2018, 5:44:42 PM2/20/18
to antlr-discussion
Hi, I need to parse SQL queries and get information about conditions assign them to a specific columns in specific tables and keep it in some structured way. Could you give me some example?  

Dne sobota 6. ledna 2018 5:02:19 UTC+1 alex; napsal(a):

alex;

unread,
Feb 21, 2018, 3:09:40 AM2/21/18
to antlr-discussion
Hi, Lukas.

I think, that for your task not need antlr,
because it is easier to use regular expressions.
In many languages exists libraries for RegEx.

WBR, alex;

среда, 21 февраля 2018 г., 1:44:42 UTC+3 пользователь Lukáš Kostka написал:

Lukáš Kostka

unread,
Feb 21, 2018, 1:26:15 PM2/21/18
to antlr-discussion
Hi, Alex. 

Thanks for your answer. 
I understand that there are easier ways to complete this task, but for my assignment I need to use antlr 4 parser. I would really appreciated some kind of advice.
Much thanks
Dne středa 21. února 2018 9:09:40 UTC+1 alex; napsal(a):
Hi, Lukas.
y

alex;

unread,
Feb 21, 2018, 2:00:47 PM2/21/18
to antlr-discussion
Hi, Lukas.

tsql-grammar
https://github.com/antlr/grammars-v4/blob/master/tsql/TSqlParser.g4
is left-recursive, or simply a hierarchy of expressions.

It have rule "update_elem", start analyze parsing from it.
this rule have parents and children.

Then use method EnterUpdate_elem() and related method.

My previous example can help you.

I helped you?

WBR, alex;

Lukáš Kostka

unread,
Feb 21, 2018, 2:32:11 PM2/21/18
to antlr-discussion
Thank you very much for your advice, I belive it will help me.
I'll try this and let you know. Once again thanks.

Dne středa 21. února 2018 20:00:47 UTC+1 alex; napsal(a):

Lukáš Kostka

unread,
Feb 25, 2018, 1:22:08 PM2/25/18
to antlr-discussion
Hi, I'm testing it and when I insert some select statement it counts whole statement as one child and only if it is in capital letters, in small letters i think it cannot consume and takes every letter as a child. Even in your exemple i have this problem. I'm using same grammar as you. I attached a screen with this.

Dne středa 21. února 2018 20:32:11 UTC+1 Lukáš Kostka napsal(a):
chyba.png

alex;

unread,
Feb 25, 2018, 2:02:01 PM2/25/18
to antlr-discussion
Hi,
my first example for old grammar, that is caseinsensitive.

New grammar is  case sensitive. See example 2.

WBR, alex;

воскресенье, 25 февраля 2018 г., 21:22:08 UTC+3 пользователь Lukáš Kostka написал:

Lukáš Kostka

unread,
Feb 25, 2018, 2:33:57 PM2/25/18
to antlr-discussion
Thank you. Now it works. There's another think I don't understand, you said I should use EnterUpdate_elem. Isn't it for Update statemes? I need to parse Select statements. 

Dne neděle 25. února 2018 20:02:01 UTC+1 alex; napsal(a):

alex;

unread,
Feb 25, 2018, 3:39:13 PM2/25/18
to antlr-discussion
so I misunderstood you.

see  rules select_list, select_list_elem, and other related

Look at gramar deeply.

WBR, alex;

воскресенье, 25 февраля 2018 г., 22:33:57 UTC+3 пользователь Lukáš Kostka написал:

Win Pooh

unread,
Sep 6, 2019, 5:30:55 AM9/6/19
to antlr-discussion
I am trying to use the code as an example to check how it works. But have some problems.
I use batch
java org.antlr.v4.Tool -visitor -Dlanguage=CSharp "%1\TSqlLexer.g4" -o "%1\TSQLLexer"
java org.antlr.v4.Tool -visitor -Dlanguage=CSharp "%1\TSqlParser.g4" -o "%1\TSQLParser"

I have compiled the TSqlLexer.g4 ok, but when I  compile the TSqlParser.g4 I receive the errors like these:

error(126): TSqlParser.g4:4026:58: cannot create implicit token for string literal in non-combined grammar: '='
error(126): TSqlParser.g4:4026:64: cannot create implicit token for string literal in non-combined grammar: '!'
error(126): TSqlParser.g4:4026:68: cannot create implicit token for string literal in non-combined grammar: '>'
error(126): TSqlParser.g4:4026:74: cannot create implicit token for string literal in non-combined grammar: '!'
error(126): TSqlParser.g4:4026:78: cannot create implicit token for string literal in non-combined grammar: '<'
error(126): TSqlParser.g4:4030:6: cannot create implicit token for string literal in non-combined grammar: '+='
error(126): TSqlParser.g4:4030:13: cannot create implicit token for string literal in non-combined grammar: '-='
error(126): TSqlParser.g4:4030:20: cannot create implicit token for string literal in non-combined grammar: '*='
error(126): TSqlParser.g4:4030:27: cannot create implicit token for string literal in non-combined grammar: '/='
error(126): TSqlParser.g4:4030:34: cannot create implicit token for string literal in non-combined grammar: '%='
error(126): TSqlParser.g4:4030:41: cannot create implicit token for string literal in non-combined grammar: '&='
error(126): TSqlParser.g4:4030:48: cannot create implicit token for string literal in non-combined grammar: '^='
error(126): TSqlParser.g4:4030:55: cannot create implicit token for string literal in non-combined grammar: '|='
error(126): TSqlParser.g4:4034:35: cannot create implicit token for string literal in non-combined grammar: '%'
...


What is wrong? 

Thnx.


среда, 10 января 2018 г., 23:42:53 UTC+1 пользователь alex; написал:
using System;
using System.IO;
using Antlr4.Runtime;
using Antlr4.Runtime.Misc;

...

Mike Lischke

unread,
Sep 6, 2019, 8:00:54 AM9/6/19
to antlr-discussion

I have compiled the TSqlLexer.g4 ok, but when I  compile the TSqlParser.g4 I receive the errors like these:

error(126): TSqlParser.g4:4026:58: cannot create implicit token for string literal in non-combined grammar: '='

What is wrong? 

That means in the combined grammar are lexer tokens implicitly defined (via the 'abc' syntax). That is not allowed. In general I always advice people to define lexer tokens explicitly. That also enables the parser user to access them by self speaking names, instead of generated ones.


Win Pooh

unread,
Sep 6, 2019, 8:31:16 AM9/6/19
to antlr-discussion
I see. There are 2 separate files on the antlr4  github. 



пятница, 6 сентября 2019 г., 14:00:54 UTC+2 пользователь Mike Lischke написал:

Win Pooh

unread,
Sep 6, 2019, 9:31:36 AM9/6/19
to antlr-discussion
Again about alex's sample code . Is there really & but not && ?

           if ((mode == JoinMode.Undefined  & ctx.ChildCount == 1) || (mode == JoinMode.Where))

среда, 10 января 2018 г., 23:42:53 UTC+1 пользователь alex; написал:
Reply all
Reply to author
Forward
0 new messages