Skip to content

Commit b01075c

Browse files
committed
By default suppress boring columns
By default, columns where the same value occurs in all rows are not included in the tsv but the value of these fields is shown in the console output. The option --all can be used to also include those fields in the tsv.
1 parent d546e4f commit b01075c

File tree

2 files changed

+10
-1
lines changed

2 files changed

+10
-1
lines changed

ragability/ragability_2tsv.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ def get_args():
3434
"""
3535
parser = argparse.ArgumentParser(description='Convert json/hjson to tsv')
3636
parser.add_argument('--input', '-i', type=str, help='Input json/hjson file', required=True)
37+
parser.add_argument('--all', '-a', action="store_true",
38+
help='Also include fields which are always empty or have the same value in all records', required=False)
3739
parser.add_argument('--output', '-o', type=str, help='Output tsv file (same as input but with tsv extension)', required=False)
3840
args_tmp = parser.parse_args()
3941
args = {}
@@ -118,6 +120,13 @@ def run(config: dict):
118120
flatdata.append(flatrecord)
119121
# convert to a data frame
120122
df = pd.DataFrame(flatdata)
123+
# Now find all the fields in flatdata which all have exactly the same value: for each of these fields
124+
# log the name and value and remove the field from the dataframe
125+
for col in df.columns:
126+
if len(df[col].unique()) == 1:
127+
logger.info(f"Field {col} has the same value in all records: >>{df[col].iloc[0]}<<")
128+
if not config["all"]:
129+
df.drop(columns=[col], inplace=True)
121130
logger.info(f"Converted to dataframe with {df.shape[0]} rows and {df.shape[1]} columns")
122131
# Now we have the dataframe, we can write it to the output file
123132
outputfile = config["output"]

ragability/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
import importlib.metadata
2-
__version__ = "0.7.4"
2+
__version__ = "0.7.5"

0 commit comments

Comments
 (0)