Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 0 additions & 117 deletions .gitignore

This file was deleted.

4 changes: 4 additions & 0 deletions src/common/models/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,10 @@ class SearchOptions(BaseModel):
days: Optional[int] = Field(default=None, description="Number of days back when the CVEs were last modified", alias="days-back", ge=0)
deprecated: Optional[bool] = Field(default=False, description="If set to true, will fetch only the deprecated CPE names", alias="deprecated")
profile: Optional[bool] = Field(default=None, description="Would also run the profile execution of the search and save the results in a file")
epssScoreGt: Optional[float] = Field(default=None, description="Filter by EPSS score greater than", alias="epss-score-gt", gt=0) # New field for EPSS score greater than
epssScoreLt: Optional[float] = Field(default=None, description="Filter by EPSS score less than", alias="epss-score-lt", gt=0) # New field for EPSS score less than
epssPercGt: Optional[float] = Field(default=None, description="Filter by EPSS percentile greater than", alias="epss-perc-gt", ge=0, le=1) # New field for EPSS percentile greater than
epssPercLt: Optional[float] = Field(default=None, description="Filter by EPSS percentile less than", alias="epss-perc-lt", ge=0, le=1) # New field for EPSS percentile less than
output: OutputType = Field(default=OutputType.json, description="Define the output format")

class Config:
Expand Down
13 changes: 12 additions & 1 deletion src/common/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import re
import json
from typing import List, Iterator
from sqlalchemy import Boolean
from sqlalchemy import Boolean, cast, Numeric
from sqlalchemy.sql import text, expression
from sqlalchemy.orm import aliased
from generic import ApplicationContext
Expand Down Expand Up @@ -40,6 +40,17 @@ def search_cves(appctx: ApplicationContext, opts: SearchOptions):

# prepare the search query
query = session.query(cve_table)
# Filter by EPSS score
if opts.epssScoreGt is not None:
query = query.filter(cast(cve_table.data['metrics']['epss']['score'].astext, Numeric) > opts.epssScoreGt)
if opts.epssScoreLt is not None:
query = query.filter(cast(cve_table.data['metrics']['epss']['score'].astext, Numeric) < opts.epssScoreLt)

# Filter by EPSS percentile
if opts.epssPercGt is not None:
query = query.filter(cast(cve_table.data['metrics']['epss']['percentile'].astext, Numeric) > opts.epssPercGt)
if opts.epssPercLt is not None:
query = query.filter(cast(cve_table.data['metrics']['epss']['percentile'].astext, Numeric) < opts.epssPercLt)

# filter by the cve IDS, either directly specified in the search options
if opts.cveId:
Expand Down
5 changes: 4 additions & 1 deletion src/config/setenv/config.ini
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ file.max.count = 10
[fetch]

; NIST CVE API
url.cve = https://services.nvd.nist.gov/rest/json/cves/2.0
url.cve = https://services.nvd.nist.gov/rest/json/cves/2.0

; NIST CPE API
url.cpe = https://services.nvd.nist.gov/rest/json/cpes/2.0
Expand All @@ -55,6 +55,9 @@ url.cwe = https://cwe.mitre.org/data/xml/views/2000.xml.zip
; MITRE CAPEC source file
url.capec = https://capec.mitre.org/data/xml/views/3000.xml.zip

; Cynetia EPSS source file
url.epss = https://epss.cyentia.com

; API_KEY set the value of API key obtained from NVD
api_key = ${NVD_API_KEY}

Expand Down
111 changes: 93 additions & 18 deletions src/load
Original file line number Diff line number Diff line change
Expand Up @@ -36,15 +36,14 @@ from common.util import init_db_schema
SPLIT_BY_COLUMN = re.compile(r'(?<!\\):')
BATCH_SIZE = 256


class ValidationError(Exception): ...
class StatusError(Exception): ...

http_retry_strategy = Retry(
total=10,
backoff_factor=0.5,
status_forcelist=[403, 503],
method_whitelist=["GET"]
method_whitelist = ['GET'],
)

# ------------------------------------------------------------------------------
Expand Down Expand Up @@ -134,6 +133,7 @@ def save_cve_data(appctx, data) -> dict:
db_records = []
db_records_cpes = []
if key in data and data[key]:

for cve in data[key]:
cve = cve['cve']

Expand Down Expand Up @@ -203,14 +203,28 @@ def fetch_data_feed(appctx, data_name, args):

import zipfile
import io
import gzip
import csv
from datetime import date

fetch_data_info = fetch_status(appctx, data_name, args)

if fetch_data_info and not args.full:
print(f"{data_name} data is already present. Loaded on {fetch_data_info['last_modified_date']}: {fetch_data_info['stats']['total_records']} records")
return
if data_name == 'epss' and fetch_data_info['last_modified_date'].date() != date.today():
pass
else:
print(f"{data_name} data is already present. Loaded on {fetch_data_info['last_modified_date']}: {fetch_data_info['stats']['total_records']} records")
return

# download the file
data_url = appctx.config.get_param(f'fetch.url.{data_name}', None)

if data_name == 'epss':
today = datetime.now().astimezone(pytz.timezone('UTC'))
previous_day = today - timedelta(days=1)
# download the EPSS data from https://epss.cyentia.com/epss_scores-2023-12-31.csv.gz using the previous day from the current date.
data_url = f"{data_url}/epss_scores-{previous_day.strftime('%Y-%m-%d')}.csv.gz"

if not data_url: raise ValidationError(f'{data_name} url config param not specified')

rest_session = requests.Session()
Expand Down Expand Up @@ -245,19 +259,24 @@ def fetch_data_feed(appctx, data_name, args):
print(f"Request failed: {e}")
return

# extract the content from the zip (load the xml)
# extract the content from the zip (load the xml or csv)
file_contents = []
if content:
with zipfile.ZipFile(content) as zip_ref:
file_contents = [zip_ref.read(file) for file in zip_ref.namelist()][0:1]
if data_name == 'epss':
epss_response = requests.get(data_url, stream=True)
epss_response.raise_for_status()
else:
with zipfile.ZipFile(content) as zip_ref:
file_contents = [zip_ref.read(file) for file in zip_ref.namelist()][0:1]

# convert to json
data_json = None
if not file_contents:
raise RuntimeError('Could not extract anything from the zip content')
else:
import xmltodict
data_json = xmltodict.parse(file_contents[0], force_list=('xhtml:p',), attr_prefix='')
if data_name != 'epss':
data_json = None
if not file_contents:
raise RuntimeError('Could not extract anything from the zip content')
else:
import xmltodict
data_json = xmltodict.parse(file_contents[0], force_list=('xhtml:p',), attr_prefix='')

# process/clean the CWE json data
def get_cwe_data(item):
Expand All @@ -284,6 +303,26 @@ def fetch_data_feed(appctx, data_name, args):
data=item
)

# process the EPSS data
def get_epss_data(item):
with gzip.open(item.raw, mode='rt') as csv_file:
csv_reader = csv.reader(csv_file)

# Skip the first row as it just contains this info: 'model_version:v2023.03.01' and 'score_date:2024-02-12T00:00:00+0000'
first_row = next(csv_reader)
# score_date also needed to be extracted
date_string = first_row[1].split(':')[1]
date_value = date_string.split('T')[0]

next(csv_reader) # skip the next row which is the header information of csv file

epss_data = {}
for row in csv_reader:
row.append(date_value)
epss_data[row[0]] = row[1:4]

return epss_data, date_value

if data_name == 'cwe':

db_records = list(map(get_cwe_data, data_json.get('Weakness_Catalog', {}).get('Weaknesses', {}).get('Weakness', [])))
Expand All @@ -296,9 +335,13 @@ def fetch_data_feed(appctx, data_name, args):
data_date = data_json.get('Attack_Pattern_Catalog', {}).get('Date', None)
save_data_method = save_capec_data

elif data_name == 'epss':
db_records, data_date = get_epss_data(epss_response)
save_data_method = save_epss_data

else: raise ValidationError(f'Unknown data type to save: <{data_name}>')

# load into Cwe/Capec table
# load into Cwe/Capec table or update Vuln table for epss
save_data_method(appctx, args, db_records)

# Update the stats about retrieved information
Expand Down Expand Up @@ -346,6 +389,33 @@ def save_capec_data(appctx, args, db_records):

db_insert_progress.close()

# ------------------------------------------------------------------------------
# Function to save EPSS data to the database
def save_epss_data(appctx, args, db_records):
from sqlalchemy.orm import load_only

with appctx.db as session:

db_insert_progress_name = 'Vuln db update epss'
bar_format = '{n_fmt}/{total} {l_bar}{bar}| ({elapsed}/{remaining})'
db_insert_progress = tqdm(total=len(db_records), bar_format=f'{db_insert_progress_name:<20} {bar_format}',ascii=True)

BATCH_SIZE_epss = 1024
for batch in [list(db_records.keys())[i:i + BATCH_SIZE_epss] for i in range(0, len(db_records), BATCH_SIZE_epss)]:

vuln_query= session.query(Vuln).filter(Vuln.vuln_id.in_(batch)).options(load_only(Vuln.data)).all()
update_values = []

for vuln_record in vuln_query:
# Find the corresponding record in the batch
epss_desired_row = db_records[vuln_record.vuln_id]
vuln_record.data['metrics']['epss']= {'score': epss_desired_row[0], 'percentile': epss_desired_row[1], 'date': epss_desired_row[2]}
update_values.append({'id': vuln_record.id, 'data': vuln_record.data})

session.bulk_update_mappings(Vuln, update_values)
db_insert_progress.update(len(batch))

db_insert_progress.close()

# ------------------------------------------------------------------------------
def fetch_data(appctx, data_name, args):
Expand All @@ -356,7 +426,7 @@ def fetch_data(appctx, data_name, args):
method = save_cve_data
elif data_name == 'cpe':
method = save_cpe_data
elif data_name in ('cwe', 'capec'):
elif data_name in ('cwe', 'capec', 'epss'):
fetch_data_feed(appctx, data_name, args)
return
else:
Expand Down Expand Up @@ -522,7 +592,7 @@ def fetch_status(appctx, name, args, data=None):
# then we need to save/update the data in the fetch status table
if data:

if name == 'cve': table_name = Vuln
if name == 'cve' or name == 'epss': table_name = Vuln
elif name == 'cpe': table_name = Cpe
elif name == 'cwe': table_name = Cwe
elif name == 'capec': table_name = Capec
Expand Down Expand Up @@ -579,7 +649,7 @@ def main():
# --------------------------------------------------------------------------
# Parse the arguments and Validate
parser = argparse.ArgumentParser(description="FastCVE database Loader")
parser.add_argument('-d', '--data', dest='data', nargs='+', required=True, choices=['cpe', 'cve', 'cwe', 'capec'], help='Specify data to be loaded')
parser.add_argument('-d', '--data', dest='data', nargs='+', required=True, choices=['cpe', 'cve', 'cwe', 'capec', 'epss'], help='Specify data to be loaded')
parser.add_argument('--full', dest='full', action='store_true', help='Will consider to fetch all data again')
parser.add_argument('--drop', dest='drop', action='store_true', help='This will drop first existing data')
parser.add_argument('-f', '--from', dest='from_date', action='store', help='From Date YYYY-MM-DD["T"HH:MI:SS]')
Expand Down Expand Up @@ -607,7 +677,12 @@ def main():

validate_opts(args_dict)

# now we need to fetch the data from NVD/NIST/MITRE using the API and populate the DB
# if epss comes before cve (load epss cve), first remove epss and add epss at end (epss must be loaded after cve)
if 'cve' in args.data and 'epss' in args.data:
args.data.remove('epss')
args.data.append('epss')

# now we need to fetch the data from NVD/NIST/MITRE/Cyentia using the API and populate the DB
for data in args.data:
fetch_data(appctx, data, args)

Expand Down
Loading