forked from softpano/pythonizer
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpythonizer
More file actions
1688 lines (1640 loc) · 65.6 KB
/
pythonizer
File metadata and controls
1688 lines (1640 loc) · 65.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/perl
## pythonizer -- Translator of the subset of Perl 5 to Python 3.x
## Copyright Nikolai Bezroukov, 2019-2021.
## Licensed under Perl Artistic license
##
## As most Perl statement are simple over 80% of them usually allow sucessful translation. That's why we use the term "fuzzy" translation.
## The result will contain some statements that need to be converted by hand or corrected. In some cases that requres change of logic.
## Best works for Perl 4 subset of Perl 5 which typically is used in sysadmin scripts.
## Perl scripts that extensivly use references or OO requre more extensive manual effort
##
## --- INVOCATION:
##
## pythonizer [options] [file_to_process]
##
##--- OPTIONS:
## -p -- version of Python for generation, Default 3, if set to 2 generation is into Python 2.7
## -v -- verbosity 0 -minimal (only serious messages) 3 max verbosity (warning, errors and serious); default -v 3
## -h -- this help
## -t -- size of tab ingenerated Python code (emulated with spaces). Default is 4
## -d level of debugging default is 0 -- production mode
## 0 -- Production mode
## 1 -- Testing mode. Program is autosaved in Archive (primitive versioning mechanism)
## 2 -- Stop at the beginning of statement analysys (the statement can be selected via breakpoint option -b )
## 3 -- More debugging output.
## 4 -- Stop at lexical scanner with $DB::single = 1;
## 5 -- output stages of Python line generation
##--- PARAMETERS:
##
## 1st -- name of file (only one argument accepted)
#--- Development History
#
# Ver Date Who Modification
# ===== ========== ======== ==============================================================
# 0.010 2019/10/09 BEZROUN Initial implementation
# 0.020 2019/10/10 BEZROUN Revised structure of global arrays, Now we have four parallel arrays: TokenStr, ValClass ValPerl, ValPy
# 0.030 2019/10/11 BEZROUN Recursion is used to expressions, but in certain cases when I need a look-ahead, bracket counting is used instead
# 0.040 2019/10/12 BEZROUN Better listing for debugging implemented
# 0.050 2019/11/06 BEZROUN Forgot almost everything after a month; revised code just to refreash memory. Tokenizer slightly improved
# 0.051 2019/11/07 BEZROUN Assignment within logical expression is not allowed in Python 2.7. It is now translated correctly
# 0.060 2019/11/08 BEZROUN post assignment conditions like "next if( substr($line,0,1) eq '') " are processed correctly
# 0.070 2019/11/11 BEZROUN x=(v>0) ? y :z is now translated into ugly Python ternary operator which exists since Python 2.5
# 0.071 2019/11/11 BEZROUN program now correctly translated 80% codelines of pre_pythonizer.pl
# 0.080 2019/12/27 BEZROUN Array ValCom is introduced for the preparation of version 0.2 of pre-processor pre_pythonizer.pl
# 0.090 2020/02/03 BEZROUN #\ means continuation of the statement.
# 0.091 2020/02/03 BEZROUN Moved sub preprocess_line to Pythonizer
# 0.100 2020/03/16 BEZROUN Reworked scanner
# 0.200 2020/08/05 BEZROUN Abandoned hope to make it perfect.
# 0.210 2020/08/07 BEZROUN Moved gen_output to Perlscan, removed ValCom from the exported list.
# 0.220 2020/08/07 BEZROUN Diamond operator is processed as a special type of identifier.
# 0.230 2020/08/09 BEZROUN gen_chunk moves to Perlscan module. Pythoncode array made local
# 0.230 2020/08/09 BEZROUN more functions and statements implemented
# 0.240 2020/08/10 BEZROUN postfix conditional like return if(rc>0) re-implemented differently via scanner buffer
# 0.250 2020/08/10 BEZROUN split function is reimplemented and optimized in case there is plain vanilla string and not-regex.
# 0.251 2020/08/12 BEZROUN Perl_default_var is renames into default_var
# 0.260 2020/08/14 BEZROUN System variables in double quoted literals are now complied correctly. Perlscan.pm improved.
# 0.261 2020/08/14 BEZROUN for loop translation corrected
# 0.270 2020/08/15 BEZROUN getopts is now implemented in Softpano.pm to allow the repetition of option letter to set the value of options ( -ddd)
# 0.300 2020/08/17 BEZROUN Python 3.8 now is default for generaion. Option -p introduced. -p 2 changes target version of Python to 2.7
# 0.310 2020/08/18 BEZROUN f-strings are implemented for Python 3 mode instead of decompiling string into chunks
# 0.320 2020/08/20 BEZROUN open statement and (condition) && ... statement translation corrected
# 0.400 2020/08/22 BEZROUN __DATA__ and POD statements are now processed. File filename.data is created for data file.
# 0.410 2020/08/24 BEZROUN pre_pythonizer now refactors Perl script pushing subroutines to the top and creating main sub.
# 0.420 2020/08/25 BEZROUN print recognized in constructs like if($debug){ print 'something';}.
# 0.430 2020/08/25 BEZROUN Variables from other namespaces recognized.
# 0.440 2020/08/26 BEZROUN FailedTrans flag is replaced with TrStatus flag. Failure now is determined by the negative value of the TrStatus flag.
# 0.450 2020/08/26 BEZROUN Option - r (refactor) added
# 0.500 2020/08/31 BEZROUN Regular expression processing competly reworked based on changed in Perlscan
# 0.510 2020/08/31 BEZROUN Special subroutine for putting regex in quote created in Perlscan.pm
# 0.520 2020/08/31 BEZROUN Statement $line=~/abc/ this is not assignment statement; In no metacharaters it should be treated as string search.
# 0.530 2020/08/31 BEZROUN Handling of __DATA and __END__ improved. Now they are not discarded but instead the separate file with extention.data is created.
# 0.540 2020/09/01 BEZROUN Translation of function substr improved by recognizing several special cases.
# 0.550 2020/09/01 BEZROUN Matching of groups corrected.
# 0.560 2020/09/02 BEZROUN Translation of for and while improved.
# 0.570 2020/09/03 BEZROUN Translation of ++ and -- implemented
# 0.580 2020/09/03 BEZROUN Translation of function sprintf implemented
# 0.600 2020/09/08 BEZROUN List on internal functions created. Translation of backquotes and open improved.
# 0.700 2020/09/17 BEZROUN Basic global varibles detection added. Global statement generated for each local subroutine
# 0.800 2020/10/02 BEZROUN More correct translation of array assignments. Globals initialiazed after main sub. Installer added
# 0.810 2020/10/05 BEZROUN Pre-pythonizer by default does not create main subroutine
# 0.820 2020/10/06 BEZROUN Function parsing rewritten to accomodate some "bracketless" cases which now became a norm in Perl
# 0.830 2020/10/08 BEZROUN Implementation of postfix conditional is completly rewritten and now uses token buffering
# 0.840 2020/10/09 BEZROUN state varibles now are prefixed with the name of sub to avoid conflict with globals
# 0.850 2020/10/12 BEZROUN print translation improved; many fixes in lex analyser
# 0.860 2020/10/14 BEZROUN Python 2.7 mode eliminated to simplify the code. Option -p removed. "since" test passed
# 0.870 2020/10/21 BEZROUN Treatment of brackets systematised. The code of subroutine expression revised.
# 0.871 2021/05/10 BEZROUN Minor corrections in the header.
#!start ===============================================================================================================================
use v5.10.1;
use warnings;
use strict 'subs';
use feature 'state';
BEGIN {
use File::Spec::Functions qw(rel2abs);
use File::Basename qw(dirname);
my $path = rel2abs( $0 );
our $myDir = dirname( $path );
push @INC,$myDir;
}
#
# Modules used ( from the current directory to make debugging more convenient; will change later)
#
use Softpano qw(autocommit abend banner logme summary out);
use Perlscan ('gen_statement', 'tokenize', 'gen_chunk', 'append', 'replace',
'insert', 'destroy', 'autoincrement_fix', '@ValClass', '@ValPerl', '@ValPy', '@ValCom',
'@ValType', '$TokenStr');
use Pythonizer qw(correct_nest getline prolog output_line @LocalSub %GlobalVar);
$VERSION='0.871';
$SCRIPT_NAME='pythonizer';
#
# options
#
$breakpoint=9999; # line from which to debug code. See Pythonizer user guide
$debug=0; # 0 -- production mode
# 1 -- testing mode
# 2 -- first pass debugging
# 3 -- provides tracing during the second pass (useful for users for trableshooing infinite loops)
# 4 -- stop at Perlscan.pm
# 5 -- stop at particular error message.
$HOME=$ENV{'HOME'}; # the directory used for autobackup (only if debug>0)
if( $^O eq 'cygwin' ){
# $^O is built-in Perl Variable that contains OS name
$HOME="/cygdrive/f/_Scripts"; # CygWin development mode -- the directory used for backups
}
#
# Local dictionaries
#
%PyOpen=('<'=>'r', '>'=>'w', '>>'=>'a', '+<'=>'+');
$LOG_DIR='/tmp/'.ucfirst($SCRIPT_NAME);
banner($LOG_DIR,$SCRIPT_NAME,"Fuzzy translator of Python to Perl. Version $VERSION",30); # Opens SYSLOG and print STDERRs banner; parameter 4 is log retention period
prolog(); # sets all options, including breakpoint
if( $debug > 0 ){
autocommit("$HOME/Archive",$ENV{'PERL5LIB'},qw(Softpano.pm Perlscan.pm Pythonizer.pm));
}
#
# Skip initial block of comments
#
$TrStatus=0;
chomp($line=<>); # we need to discard the first line with /usr/bin/perl as interpreter
output_line('','#!/usr/bin/python3 -u'); # put a proper line
if ($line =~ /^\s*#!/){
$line=getline(); # skip previous interpreter definition and get the first meaningful line + initial block of comments, if present
}else{
getline($line); # put the first line in the readline buffer
$line=getline(); # rescan it to have full proper processing
}
foreach $l ('import sys,os,re','import fileinput,subprocess,inspect'){
output_line('',$l); # to block reproducing the first source line
}
#while($l=<DATA>){
# chomp $l;
# output_line($l,'',''); # to block reproducing the first source line; added Sept 3, 2020 just for the future
#}
#close DATA;
#
#Main loop
#
@Perlscan::BufferValClass=@Perlscan::BufferValCom=@Perlscan::BufferValPerl=@Perlscan::BufferValPy=(); # cleaning after the first pass
my ($start,$token_buffer_active);
$CurSub='main';
$token_buffer_active=0;
$we_are_in_sub_body=0;
while( defined($line) || scalar(@Perlscan::BufferValClass)>0 ){
$TrStatus=0;
if( scalar(@ValClass)==0 || ! defined($ValClass[0]) ){
$line=getline(); # skip lines with no tokens like ';'
next;
}
#
# You need to claw back tokens from buffer for postfix conditionals. This is a pretty brittle and complex code -- Oct 8,2020 NNB
#
if ( scalar(@Perlscan::BufferValClass)==0 ) {
if( $debug>1 ){
say STDERR "\n\n === Line $. Perl source:".(defined($line)?$line:$ValPerl[0])."===\n";
if( $.>=$breakpoint ){
logme('S', "Breakpoint was triggered at line $. in pythonizer.pl");
# $breakpoint=999999;
$DB::single = 1;
}
}
tokenize($line); # I just like to see tokenize call first in debugger :-)
}else{
if($token_buffer_active==0){
@ValClass=@ValPerl=('{');
$token_buffer_active=1;
}elsif($token_buffer_active==1){
@ValClass=@Perlscan::BufferValClass;
$TokenStr=join('',@ValClass);
@ValCom=@Perlscan::BufferValCom;
@ValPerl=@Perlscan::BufferValPerl;
@ValPy=@Perlscan::BufferValPy;
$token_buffer_active=2;
}else{
@ValClass=@ValPerl=('}');
@Perlscan::BufferValClass=@Perlscan::BufferValCom=@Perlscan::BufferValPerl=@Perlscan::BufferValPy=();
$token_buffer_active=0;
}
}
#
# Dealing with problem of state varaible mapping into Python via renaming
#
rename_state_var(0,$#ValPy);
if( index($TokenStr,'s^')>-1){
# Selected cases of postfix and prefix operator can be translated; other not
autoincrement_fix() # exported function located in Perlscan
}
#
# Statements
#
$RecursionLevel=0;
if( $ValClass[0] eq '}' ){
# we treat curvy bracket as a separate dummy statement
correct_nest(-1); # next line will be de-indented
if( $we_are_in_sub_body && $Pythonizer::NextNest ==0 ){
correct_nest(0,0);
initialize_globals_for_state_vars();
%new_state_var_name=(); # hash for own and state variables
%new_state_var_init=();
$CurSub='main';
}
}elsif( $ValClass[0] eq '{' ){
correct_nest(1); # next line will be indented
}elsif( $ValClass[0] eq '(' ){
$close_br_pos=matching_br(0);
if( $close_br_pos && $ValClass[$close_br_pos+1] eq '=' ){
$TrStatus=assignment(0);
}else{
$TrStatus=-255;
}
}elsif( $ValPy[0] eq 'NoTrans!' ){
output_line('','#SKIPPED: '.$line);
$line=getline();
next;
}elsif( $ValPerl[0] eq 'sub' ){
$we_are_in_sub_body=1;
%new_state_var_name=(); # hash for own and state variables
%new_state_var_init=();
# Perl has two types of sub statements -- prototype and actual
$CurSub=$ValPy[1];
correct_nest(0,0);
gen_chunk('def',$CurSub,'(perl_arg_array):'); # def name ([list of arguments])
$LocalSub{$CurSub}=1;
if (exists($GlobalVar{$CurSub}) ){
gen_statement();
correct_nest(1,1);
output_line($GlobalVar{$CurSub});
correct_nest(0,0);
}
}elsif( $ValPerl[0] eq 'BEGIN' ){
correct_nest(0,0);
gen_chunk($ValPy[0],'perl_begin:'); # def
}elsif( $ValPerl[0] eq 'close' ){
for( my $i=1; $i<@ValPy; $i++ ){
if( $ValClass[$i] eq 'i' ){
gen_chunk($ValPy[$i].'.f.close;');
}
}
}elsif( $ValPerl[0] =~ /say|print/ ){
$TrStatus=print3(0);
}elsif( $ValPerl[0] =~ /warn/ ){
$TrStatus=print3(0,'STDERR'); # in Python3 this is a function
}elsif( $ValClass[0] eq 's' ){
if( ($TokenStr=~tr/=//) > 1 && $ValClass[-1] eq 'd' && ($ValPerl[-2] eq '+=' || $ValPerl[-2] eq '-=') ){
# multiple assignment with the last increment like $K=$i+=1 need to be expanded due to bug in Python parser
$ValPy[-1]=$ValPy[-3].substr($ValPerl[-2],0,1).$ValPy[-1];
$ValPy[-2]=$ValPerl[-2]='=';
}
$TrStatus=assignment(0);
}elsif( $ValClass[0] eq 't' ){
if( scalar(@ValClass)==2 ){
#uninitalise single var declaration like my $line
if( $ValPerl[0] eq 'my' ){
output_line("$ValPy[1]=None");
}elsif( $ValPerl[0] eq 'own' ){
gen_chunk( $ValPy[0], $ValPy[1] );
}elsif( $ValPerl[0] eq 'state' ){
$new_name=$CurSub.'_'.$ValPy[1];
$new_state_var_name{$ValPy[1]}=$new_name;
gen_chunk($ValPy[0],$new_name);
}
finish();
next;
}elsif( scalar(@ValClass)==4 && $ValClass[2] eq '=' ){
if( $ValPerl[0] eq 'my' ){
output_line("$ValPy[1]=$ValPy[-1]");
}elsif( $ValPerl[0] eq 'own' ){
gen_chunk( $ValPy[0], $ValPy[1].'=',$ValPy[-1] );
}elsif( $ValPerl[0] eq 'state' ){
$new_name=$CurSub.'_'.$ValPy[1];
$new_state_var_name{$ValPy[1]}=$new_name;
gen_chunk($ValPy[0],$new_name);
$new_state_var_init{$ValPy[1]}=$ValPy[-1];
}
finish();
next;
}elsif($ValClass[1] eq '('){
#this is a more complex case
my $last=matching_br(1);
if($#ValClass>$last && $ValClass[$last+1] eq '='){
if($ValPerl[0] eq 'state' ){
rename_state_var(2,$last-1);
}
$TrStatus=assignment(1);
}else{
for($i=2; $i<$last;$i++){
if ($ValPy[$i] eq ','){
gen_chunk('=');
}elsif($ValPerl[0] eq 'state'){
$new_name=$CurSub.'_'.$ValPy[$i];
$new_state_var_name{$ValPy[$i]}=$new_name;
gen_chunk($new_name);
}else{
gen_chunk($ValPy[$i]);
}
}
gen_chunk('=None');
}
}elsif( $ValClass[2] eq '=' ){
$TrStatus=assignment(1);
}else{
$TrStatus=-255;
}
}elsif( $ValClass[0] eq 'h' ){
# hash to has need method copy #
if( $ValClass[1] eq '=' ){
if( $ValPerl[2] eq '(' ){
# Special case hash initialization needs to be converted to dictionary initialization
gen_chunk($ValPy[0].'={');
for( my $i=3; $i<$#ValPy; $i++ ){
gen_chunk( $ValPy[$i] );
}
gen_chunk('}');
finish();
next;
}elsif( scalar(@ValClass)==2 && ($ValPerl[2] eq 'h' || $ValPerl[2] eq 'q') ){
gen_chunk("$ValPy[0]=$ValPy[2].copy"); # copy structure not reference
finish();
next;
}
}
$TrStatus=assignment(0);
}elsif($ValClass[0] eq 'a'){
if( $ValClass[1] eq '=' ){
if( $#ValClass==2 && $ValClass[2] eq 'a'){
# Special case array to array copy
gen_chunk("$ValPy[0]=$ValPy[2].copy");
finish();
next;
}elsif( $ValPerl[2] eq '(' ){
# array initialization
gen_chunk($ValPy[0],'=[');
$end_pos=matching_br(2);
for (my $i=3; $i<$end_pos; $i++){
gen_chunk($ValPy[$i]);
}
gen_chunk(']');
finish();
next;
}elsif( $ValPerl[2] =~ /<\w*>/ ){
# Special case of array initialization via slurping
gen_chunk("$ValPy[0]=$ValPy[2].copy");
finish();
next;
}elsif( $ValClass[2] eq 'a' && $ValPerl[3] eq '=' ){
my $last_eq=rindex($TokenStr,'=');
if( $ValPerl[$last_eq+1] eq '(' ){
# list assignment @x=(1,2,3);
$ValPy[$last_eq+1]='[';
if( $ValPerl[-1] eq ')' ){
$ValPy[-1]=']';
}else{
$TrStatus=-255;
finish();
next;
}
}
for( $i=0; $i<$last_eq; $i+=2 ){
# cascade assignent processing @x=@y=@z
if ($ValClass[$i+1] ne '='){
logme('S',"Token $ValPerl[$i+1] was found insted of '=' in what is expected to be array assignment");
$TrStatus=-255;
last;
}
if( $last_eq+1==@ValClass ){
gen_statement("$ValPy[$i]=$ValPy[-1].copy"); # last array is the source
}elsif( $ValPerl[$last_eq+1] eq '(' ){
#left side is the list $a=@b=(1,2,3)
gen_chunk("$ValPy[$i]=");
expression($last_eq+1,$#ValClass); # processing (1,2,3) -- you need brackets here. Recursion level should be 0
gen_statement();
}
}
finish();
next;
}
}
$TrStatus=assignment(0);
}elsif( $ValClass[0] eq 'c' ){
#normal control statement: if/while/for, etc -- next line is always nested.
# in foreach loop "(" is absent ) do in perl you can's distibush between postfix for and foreach loop without parens
if( defined($ValType[0]) && $ValType[0] eq 'P' && $ValClass[1] ne '(' && $ValPy[0] ne 'for' ){
insert(1,'(','(','(');
append(')',')',')');
}
if ($TokenStr=~/^c\(\!f\(?/ && $ValPerl[3] eq 'open' ){
$TrStatus=open_fun(3,'c');
}else{
$TrStatus=control(0); # control now itself destroy the last ) Oct 14, 2020 --NNB
}
}elsif( $ValClass[0] eq 'C' ){
#next last continue
if( $ValPerl[0] eq 'elsif' ){
gen_chunk('elif ');
$end_pos=matching_br(1);
$TrStatus=expression(2,$end_pos-1,0);
gen_chunk(':');
gen_statement();
}elsif( $ValPerl[0] eq 'else' ){
gen_chunk('else:');
gen_statement();
}
}elsif( $ValClass[0] eq 'f' ){
#this is a left hand function like is substr($line,0,1)='' or open or chomp;
if( $ValPerl[0] eq 'substr' ){
$TrStatus=left_hand_substr(0);
}elsif( $ValPerl[0] eq 'chomp' ){
if( $#ValPerl==0) {
gen_chunk(q[default_var=default_var.rstrip("\n")]); # chomp with no argumnets
}else{
function(0,$#ValClass);
}
}elsif( $ValPerl[0] eq 'chop' ){
if( $ValPerl[1] eq '(' ){
if( $ValClass[2] eq 's' ){
gen_chunk($ValPy[2].'='.$ValPy[2].'[0:-1]');
} else{
$TrStatus=-1;
}
}else{
gen_chunk('default_var=default_var[0:-1]');
}
}elsif( $ValPerl[0] eq 'open' ){
$rc=open_fun(0,'s');
}else{
$TrStatus=function(0);
}
}elsif( $ValClass[0] eq 'x' ){
# this is backquotes
gen_chunk(qq{default_var=subprocess.run($ValPy[0],capture_output=True,text=True,shell=True)});
gen_statement();
gen_chunk(qq[subprocess_rc=default_var.returncode]);
}elsif( $ValClass[0] eq 'd' ){
if( length($TokenStr)==1 ){
logme('W','line starts with digit');
}else{
$TrStatus=-1;
}
}elsif( $ValClass[0] eq '(' ){
# (/abc/) && a=b; (a<b) || a=7
$right_br=matching_br(0);
if( $ValClass[$right_br+1] eq '0' ){
gen_chunk('if ');
$RecursionLevel=-1;
$TrStatus=expression(0); # this will scan till ')'
}elsif( $ValClass[$right_br+1] eq '1' ){
gen_token('if ! ');
$RecursionLevel=-1;
$TrStatus=expression(0); # this will scan till ')'
}elsif( $ValClass[$right_br+1] eq '=' ){
#this is a list assignment like ($i,$k)=@_ or ($i,$k)=split(//,$text)
$RecursionLevel=-1;
$TrStatus=expression(0,$right_br);
gen_chunk($ValPy[$right_br+1]);
$RecursionLevel=-1;
$TrStatus=expression($right_br+1,$#ValClass,0);
}
}elsif( $ValClass[0] eq 'k' ){
# keywords next, last and all other for which we have no special treatment
if( $#ValClass == 0 ){
gen_chunk($ValPy[0]);
}elsif( $ValClass[1] eq '(' ){
$TrStatus=expression(0,$#ValClass,0); # Can be this will scan till ')'
}else{
# last 2
gen_chunk($ValPy[0]);
$TrStatus=expression(1,$#ValClass); # this will scan till ')'
}
}elsif( $ValClass[0] eq 'i' ){
# user defined functon
if( $#ValClass>0 && $ValClass[1] eq '(' ){
$right_br=matching_br(1);
if( $ValClass[2] eq ')' ){
# function with zero arguments
if( $ValPy[0] eq 'main' && $Pythonizer::CurNest==0 ){
my $globals=substr($GlobalVar{$CurSub},length('global'));
$globals=~tr/,/=/;
gen_statement($globals.'=None');
}
gen_chunk($ValPy[0].'([])');
}elsif( $ValClass[2] eq 'f' && ( $ValClass[3] ne '(' || ($ValClass[3] eq '(' && matching_br(3) == $right_br-1)) ){
# bracketless call of built-in function as a single argument: get_config(split / /,$line)
# or bracketed call to built-in function call that return list. We do not need sqare brackets
gen_chunk($ValPy[0],'(');
function(2,$right_br-1); # we assume that evethying in brackets is the function call
gen_chunk(')');
}else{
# In all other cases we will put sqare bracket, even if they are redundant: they can be manually deleted.
gen_chunk($ValPy[0]);
gen_chunk('([');
$TrStatus=expression(2,$#ValClass-1,-1); # this will scan till ')' and should eliminate ')' due to -1 as 3-d arg
gen_chunk('])');
}
}else{
$RecursionLevel=-1;
$TrStatus=expression(0,$#ValClass); # this will scan till ')'
}
}else{
$TrStatus=-1;
}
finish();
} # while
initialize_globals_for_state_vars();
#
# Epilog -- close output file and if you are in debugging mode display the content on the screen
#
if (scalar(@NoTrans)>0) {
say STDERR "\nATTENTION!\nThe following lines were probably translated incorrectly:\n";
say STDERR join("\n",@NoTrans);
}
$rc=summary(); # print diagnostic messages summary
exit $rc;
sub finish
# imitation of continue statement.
{
if( defined($TrStatus) && $TrStatus < 0 ){
push(@NoTrans,"[$.]: $line");
}
gen_statement();
$line=getline(); # get new line
correct_nest();
} # finish
sub rename_state_var
{
($from,$to)=@_;
for( $i=$from; $i<=$to; $i++ ){
if( defined($ValClass[$i]) && $ValClass[$i]=~/[sah]/ && exists($new_state_var_name{$ValPy[$i]}) ){
$ValPy[$i]=$CurSub.'_'.$ValPy[$i];
}
}
}
sub initialize_globals_for_state_vars
{
my @renamed_state_var=values(%new_state_var_name);
return unless( defined($renamed_state_var[0]) ); # nothing to do
# First generate varibles for which we have inialization
for($i=1;$i<@renamed_state_var;$i++){
if( exists($new_state_var_init{$renamed_state_var[$i]}) ){
gen_statement($renamed_state_var[$i].'='.$new_state_var_init{$renamed_state_var[$i]});
}
}
# Now initialize the rest to None
my $first=0;
for( $i=1; $i<@renamed_state_var; $i++){
unless( exists($new_state_var_init{$renamed_state_var[$i]}) ){
if( $first==0 ){
gen_chunk($renamed_state_var[$i]);
$first++;
}else{
gen_chunk('=',$renamed_state_var[$i]);
}
}
}
($first) && gen_chunk('=None');
gen_statement;
}
#
# Print statement for Python 3
#
sub print3
{
my $start=$_[0];
my ($k,$handle);
# end="") instead of trailing comma in Python 2
gen_chunk($ValPy[$start],'(');
if( $ValClass[$start+1] eq 'i' ){
$handle=$ValPy[$start+1];
$k=$start+2;
}else{
$handle='';
$k=$start+1;
}
if( $#ValClass>$k ){
$TrStatus=expression($k,$#ValClass,0);
return -1 if ($TrStatus<0);
}else{
if(length($handle)>0){
gen_chunk("file=$handle)");
}else{
gen_chunk(')');
}
return;
}
if ($ValPerl[$start] eq 'print' && $ValClass[-1] eq '"' ){
if( $Perlscan::PythonCode[-1]=~qr[\\n["']$] ){
substr($Perlscan::PythonCode[-1],-3,2)=''; # Perl print was actually say
}else{
gen_chunk(',end=""');
}
}
if( $handle){
#printing to file handle
gen_chunk(',file=',$handle); # Python 3.x: print('hello world', file=file_object)
}
#say
gen_chunk(')');
return scalar(@ValClass);
} # print3
sub assignment
#
# Analyse and generate code for Perl assignment statement
#
{
my $start=$_[0]; # start of analysys of assignment statement
if( $start<0 || $TrStatus<0 ){
return -255;
}
my $limit=(scalar(@_)>1) ? $_[1] : $#ValClass; # Nov 11, 2019 accept not only the index of the first token, but also index of the last.
my ($k,$split,$post_processing,$comma_pos,$colon_pos,$from,$to);
#
# Assignment with post condition need to be transformed into regular control structure in Python
#
$k=$start;
#
# C-style ++ and --
#
if( $ValClass[$#ValClass] eq '^' ){
if ($#ValClass-$start==1){
gen_chunk($ValPy[$k],$ValPy[$k+1]);
return $#ValClass+1
}else{
replace($#ValClass,'=','=',substr($ValPy[-1],0,2));
append('d','1','1');
$limit+=1;
}
}
#
# We assume this is a regular assignment with "=". Let's analyse the left side.
#
if( ($split=index($TokenStr,'=',$k))>-1 ){
if( $split-$k==1 ){
# single token on the left side -- regular assignment;
gen_chunk($ValPy[$k]); # simple scalar assignment -- varible of left side
}elsif( $ValPerl[$k] eq '(' ){
# brackets on the right side -- we assume that this is the list on the left side
gen_chunk('[');
$k++;
gen_chunk($ValPy[$k]); # first in the cascading assignement
$k++;
while($k<$split ){
# this was we skip delimiters
if( substr($TokenStr,$k,1)=~/^[sha]/ ){
gen_chunk(','.$ValPy[$k]);
}
$k++;
}
gen_chunk(']');
$k++;
}else{
# possibly array with complex subscripts or complex hash key expression
$k=expression($k,$split-1,0); # on the left side it can be array index or something more complex
return -255 if ($k<0);
}
gen_chunk($ValPy[$split]); # generate appropriate operation hidden under generic token '=' ( +=, -=, etc)
if( $limit - $split == 1 ){
# only one token after '='
if ($ValClass[$limit] eq 'x' ) {
gen_chunk(qq{subprocess.run($ValPy[$split+1],capture_output=True,text=True,shell=True)});
gen_statement();
gen_statement(qq{subprocess_rc=$ValPy[$split-1].returncode});
gen_chunk($ValPy[$k]);
gen_chunk($ValPy[$split]);
gen_chunk($ValPy[$k].'.stdout');
}else{
gen_chunk($ValPy[$limit]); # that includes diamond operator <> and <HANDLE> Aug 10,2020
#$is_numeric{$ValPerl[$k]}='d'; # capture the type of variable.
}
return($#ValClass);
}else{
# we have some kind of expression on the right side
if( (substr($TokenStr,$split,2) eq '=(')>-1 && (index($TokenStr,')?',$split))>-1 ){
# this is C-style conditional assigment x=(v>0):y:z;
# Step one analyse the expression in blackets
$to=matching_br($split+1);
($to<0) && return -255;
# Fist we need to generate then part of ternary if expression
$colon_pos=index($TokenStr,':',$to+2);
if( $colon_pos>-1 ){
$k=expression($to+2,$colon_pos-1,0);
return -255 if( $k<0 );
}else{
$k=expression($to+2,$#ValClass,0);
return -255 if( $k<0 );
}
gen_chunk(' if ');
$k=$split+1;
if( $to==$k+2){
$k++;
gen_chunk($ValPy[$k]); # expression consist of one token
$k+=3; # the next symbol after ')?'
}else{
$k=expression($k+1,$to-1,0); # generate conditon without brackets
return -255 if ($k<0);
}
if ($colon_pos>-1){
gen_chunk(' else ');
$k=expression($colon_pos+1,$#ValPerl,0); # up to the very end
return -255 if( $k<0 );
}
gen_statement(); # output if line
return $#ValClass;
}else{
$k=expression($split+1,$limit,0); # process expression without brackets -- last param is 0
return -255 if( $k<0 );
}
}
}elsif( ($split=index($TokenStr,'~',$k))>-1) {
$k=regex_and_translate($start,$k,$split,0);
return $k+1;
}else{
return -255;
}
return($#ValClass);
} # assignment
sub regex_and_translate
#
# process very tricky regex and tranlate function
#
{
my($start,$k,$split)=@_;
if( $ValClass[$split+1] eq 'f' && $ValPerl[$split+1] eq 'tr'){
# tr is a special case -- this is not regular expression
if( $split-$k==1 ){
gen_chunk($ValPy[$split-1],'=',$ValPy[$split-1],'.translate(',$ValPy[$split-1],$ValPy[$split+1],')'); # a=a.trasnlate(a)
}else{
$k=expression($start,$split-1,0); # can be array index or something more problemtic ;-)
return -255 if( $k<0 );
gen_chunk('=');
$k=expression($start,$split-1,0); # replicate the left part of the assignment
gen_chunk('.translate(');
$k=expression($start,$split-1,0); # replicate the left part of the assignment
gen_chunk($ValPy[$split+1],')');
}
# next token
$k=$split+1;
}elsif( ($split=index($TokenStr,'~',$k))>-1 ){
#regular expression $string =~ /cat/ or $string =~m/cat/
# re.search(r'cat', string): ...
if($ValClass[$split+1] eq 'q') {
# match only; There is no variable to assign results
if( substr($ValPy[$split+1],0,1) eq '.' ){
$k=expression($start,$split-1,0); # generate left side of the regular expression
return -255 if( $k<0 );
gen_chunk($ValPy[$split+1],')'); # add dot part generated by scanner
}else{
gen_chunk($ValPy[$split+1]);
$k=expression($start,$split-1,0); # generate left side of the regular expression, it can be array index or something even more problemtic ;-)
return -255 if( $k<0 );
if (index($ValPy[$split+1],':=')>-1){
gen_chunk('))'); # close function bracket and expression
}else{
gen_chunk(')'); # close function bracket and expression
}
}
$k=$split+1;
}elsif( $ValClass[$split+1] eq 'f' && $ValPerl[$split+1] eq 're' ){
# this is case of substirution
if( $split-$k==1 ){
gen_chunk($ValPy[$split-1]); # a
gen_chunk('='); # a=
if( substr($ValPy[$split+1],0,1) eq '.' ){
gen_chunk($ValPy[$split-1].$ValPy[$split+1]); # a=a.find(string)
}else{
gen_chunk("$ValPy[$split+1]$ValPy[$split-1])"); # a=re.sub(rexex,replacement,variable)
}
}else{
$k=expression($start,$split-1,0); # can be array index or something more problemtic ;-)
return -255 if( $k<0 );
gen_chunk('=');
if( substr($ValPy[$split+1],0,1) eq '.' ){
$k=expression($start,$split-1,0); # replicate the left part of the assignment
return -255 if( $k<0 );
gen_chunk($ValPy[$split+1]);
}else{
$k=expression($start,$split-1,0); # replicate the left part of the assignment
return -255 if( $k<0 );
gen_chunk(')');
}
}
# next token
$k=$split+1;
}else{
return -255;
}
}
return $k+1;
}
sub matching_br
# Find matching bracket, arase closeing braket, if found.
# Arg1 - starting position for scan
# Arg2 - (optional) -- balance from whichto start (allows to skip opening brace)
{
my $scan_start=$_[0];
my $balance=(scalar(@_)>1) ? $_[1] : 0; # case where opening bracket is missing for some reason or was skipped.
for( my $k=$scan_start; $k<length($TokenStr); $k++ ){
$s=substr($TokenStr,$k,1);
if( $s eq '(' ){
$balance++;
}elsif( $s eq ')' ){
$balance--;
if( $balance==0 ){
return $k;
}
}
} # for
return $#TokenStr;
} # matching_br
#
# Extration of assignment statement from conditions and other places where Python prohibits them
# Added Nov 11, 2019
#
sub pre_assign
{
my $assign_start=$_[0];
my $assign_end=matching_br($assign_start);
($assign_end<0) && return -255;
assignment($assign_start+1,$assign_end-1);
gen_statement();
#
# remove everytnogh but variable name. we need to shink arrrays
#
my $from=index($TokenStr,'=',$assign_start+2); # "=" now is next to identifier; should be
my $howmany=$assign_end-$from+1; # closed interval
if( $howmany>0 ){
destroy($from,$howmany);
}
# Remove opening bracket -- it is no longer needed
destroy($assign_start,1);
}
#
# Process all control statements
#
sub control
{
my $begin=$_[0];
if( $begin<0 || $TrStatus<0 ){
return -255;
}
my $limit;
my ($hashpos,$end_pos,$end_br_pos,$k,$increment,$tempvar);
$start=$begin+1;
if( $ValPerl[$start] eq '(' ){
$start++;
}
if($ValClass[-1] eq ')' ){
Perlscan::destroy(-1); # eliminate last bracket -- Perl does not enclose controlstatements in bralckts.
$limit=$#ValClass; # exclude it from count
}
if( $ValPerl[$begin] eq 'if' || $ValPerl[$begin] eq 'unless' ){
if( $TokenStr eq 'c(i)') {
# while(<SYSIN>)
gen_chunk("$ValPy[$start] default_var=$ValPy[$start+2]:"); # gen initial keyword
return($#ValClass);
}
gen_chunk($ValPy[$begin]); # gen initial keyword
$k=expression($start,$limit,0); # last bracket was erased.
return -255 if ($k<0);
gen_chunk(':');
return($#ValClass);
}elsif( $ValPerl[$begin] eq 'while' || $ValPerl[$begin] eq 'until' ){
if( $TokenStr eq 'c(s=i)' && substr($ValPerl[4],0,1) eq '<' ) {
gen_chunk("$ValPy[0] $ValPy[2] in $ValPy[4]" );
}elsif( substr($TokenStr,$start) eq 'c(d)' && $ValPy[$start+2]==1 ){
gen_chunk("$ValPy[0] True" ); # while(1) Perl idiom
}elsif( substr($TokenStr,$start) eq 'c(i)' ){
gen_chunk("$ValPy[0] default_var in $ValPy[2]" ); # while(<stdin>)
}else{
gen_chunk($ValPy[$begin]); #while
if ($ValClass[$start] eq '('){
$TrStatus=expression($start,$limit,1); # gen expression
}else{
$TrStatus=expression($start,$limit,0); # gen expression
}
}
gen_chunk(':');
return($#ValClass);
}elsif( $ValPerl[$begin] eq 'for' && $ValPerl[$begin+1] eq '(' && $ValClass[$begin+2] !~ /[ahf]/ ){
# regular for loop but can be foreach loop too
if( $ValPerl[-1] eq '++'){
$increment='';
}elsif( $ValPerl[-1] eq '--'){
$increment='-1';
}else{
logme('S', "In the current version more complex increment than ++ or -- requires manual translation");
$TrStatus=-1;
return -255;
}
$start=$begin;
gen_chunk($ValPy[$start]);
if ($ValClass[$start+2] eq ';'){
gen_chunk($ValPy[$start+3],' in range(',$ValPy[$start+3]);
$end_pos=$start+2;
}else{
gen_chunk($ValPy[$start+2]); # index var
gen_chunk('in range(');
$start=index($TokenStr,'=',$start); # find initialization. BTW it can be expression
if( $start == -1 ){$TrStatus=-1; return -255;}
$start++;
# find end of initialization
$end_pos=next_same_level_token(';',$start,$limit); # end of expression
if( $end_pos-$start==1 ){
gen_chunk($ValPy[$start]);
}else{
$TrStatus=expression($start,$end_pos-1,0); # gen expression
if( $TrStatus < -1 ){return -255;}
}
}
gen_chunk(',');
#
# Analize loop exit condition
#
$start=index($TokenStr,'>',$end_pos); # fron last ;
if( $start == -1 ){$TrStatus=-1; return -255; }
$start++;
# find end of loopexit condition
$end_pos=next_same_level_token(';',$start,$limit);
if( $end_pos == -1 ){$TrStatus=-1; return -255; }
if( $end_pos-$start==1 ){
if($ValClass[$start] eq 'a'){
gen_chunk($ValPy[$start]); # array as limit of the range
}else{
gen_chunk($ValPy[$start]); # all other cases of single limit of the range
}
}else{
$TrStatus=expression($start,$end_pos-1); # gen expression for the limit of the range
return -255 if ($TrStatus<0);
}
# we already got increamnt at the begining
if( $increment) {
gen_chunk(",$increment):");
}else{
gen_chunk('):');
}
return($#ValClass);
}elsif( $ValPerl[$begin] eq 'for' || $ValPerl[$begin] eq 'foreach' ){
gen_chunk($ValPy[$begin]);
if ($ValClass[$start] eq 's'){
gen_chunk($ValPy[$start].' in ');
}else{
gen_chunk('default_var in ');
}
$start=index($TokenStr,'(',$start);
if( substr($TokenStr,$start) eq '(a)') {
# loop over an array
gen_chunk($ValPy[$start+1]);