Commit 23457bbb authored by dasharatha.vamshi's avatar dasharatha.vamshi

init

parent f9c3c459
# Default ignored files
/shelf/
/workspace.xml
<component name="InspectionProjectProfileManager">
<profile version="1.0">
<option name="myName" value="Project Default" />
<inspection_tool class="PyChainedComparisonsInspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
<option name="ignoreConstantInTheMiddle" value="true" />
</inspection_tool>
<inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
<option name="ignoredPackages">
<value>
<list size="55">
<item index="0" class="java.lang.String" itemvalue="scikit-image" />
<item index="1" class="java.lang.String" itemvalue="scipy" />
<item index="2" class="java.lang.String" itemvalue="opencv-python" />
<item index="3" class="java.lang.String" itemvalue="torch" />
<item index="4" class="java.lang.String" itemvalue="torchvision" />
<item index="5" class="java.lang.String" itemvalue="absl-py" />
<item index="6" class="java.lang.String" itemvalue="protobuf" />
<item index="7" class="java.lang.String" itemvalue="rsa" />
<item index="8" class="java.lang.String" itemvalue="tensorflow-estimator" />
<item index="9" class="java.lang.String" itemvalue="opt-einsum" />
<item index="10" class="java.lang.String" itemvalue="python-dateutil" />
<item index="11" class="java.lang.String" itemvalue="cycler" />
<item index="12" class="java.lang.String" itemvalue="gast" />
<item index="13" class="java.lang.String" itemvalue="numpy" />
<item index="14" class="java.lang.String" itemvalue="pyasn1" />
<item index="15" class="java.lang.String" itemvalue="requests" />
<item index="16" class="java.lang.String" itemvalue="importlib-metadata" />
<item index="17" class="java.lang.String" itemvalue="pyasn1-modules" />
<item index="18" class="java.lang.String" itemvalue="requests-oauthlib" />
<item index="19" class="java.lang.String" itemvalue="tensorflow" />
<item index="20" class="java.lang.String" itemvalue="tensorboard-plugin-wit" />
<item index="21" class="java.lang.String" itemvalue="zipp" />
<item index="22" class="java.lang.String" itemvalue="oauthlib" />
<item index="23" class="java.lang.String" itemvalue="astunparse" />
<item index="24" class="java.lang.String" itemvalue="urllib3" />
<item index="25" class="java.lang.String" itemvalue="pyparsing" />
<item index="26" class="java.lang.String" itemvalue="Cython" />
<item index="27" class="java.lang.String" itemvalue="Markdown" />
<item index="28" class="java.lang.String" itemvalue="google-auth-oauthlib" />
<item index="29" class="java.lang.String" itemvalue="Werkzeug" />
<item index="30" class="java.lang.String" itemvalue="kiwisolver" />
<item index="31" class="java.lang.String" itemvalue="tqdm" />
<item index="32" class="java.lang.String" itemvalue="yolov5processor" />
<item index="33" class="java.lang.String" itemvalue="tensorboard" />
<item index="34" class="java.lang.String" itemvalue="future" />
<item index="35" class="java.lang.String" itemvalue="matplotlib" />
<item index="36" class="java.lang.String" itemvalue="cachetools" />
<item index="37" class="java.lang.String" itemvalue="grpcio" />
<item index="38" class="java.lang.String" itemvalue="Keras" />
<item index="39" class="java.lang.String" itemvalue="google-auth" />
<item index="40" class="java.lang.String" itemvalue="idna" />
<item index="41" class="java.lang.String" itemvalue="Pillow" />
<item index="42" class="java.lang.String" itemvalue="cython" />
<item index="43" class="java.lang.String" itemvalue="keras" />
<item index="44" class="java.lang.String" itemvalue="imgaug" />
<item index="45" class="java.lang.String" itemvalue="opencv-contrib-python" />
<item index="46" class="java.lang.String" itemvalue="paho-mqtt" />
<item index="47" class="java.lang.String" itemvalue="pymongo" />
<item index="48" class="java.lang.String" itemvalue="fbprophet" />
<item index="49" class="java.lang.String" itemvalue="scikit-learn" />
<item index="50" class="java.lang.String" itemvalue="sklearn" />
<item index="51" class="java.lang.String" itemvalue="statsmodels" />
<item index="52" class="java.lang.String" itemvalue="python-dotenv" />
<item index="53" class="java.lang.String" itemvalue="pystan" />
<item index="54" class="java.lang.String" itemvalue="Cerberus" />
</list>
</value>
</option>
</inspection_tool>
<inspection_tool class="PyPep8NamingInspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
<option name="ignoredErrors">
<list>
<option value="N802" />
<option value="N806" />
<option value="N801" />
</list>
</option>
</inspection_tool>
<inspection_tool class="PyUnresolvedReferencesInspection" enabled="true" level="WARNING" enabled_by_default="true">
<option name="ignoredIdentifiers">
<list>
<option value="cv2.imresize" />
<option value="detectron2.model_zoo.get_config_file" />
<option value="detectron2.model_zoo.get_checkpoint_url" />
</list>
</option>
</inspection_tool>
</profile>
</component>
\ No newline at end of file
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.7" project-jdk-type="Python SDK" />
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/preprocess-data.iml" filepath="$PROJECT_DIR$/.idea/preprocess-data.iml" />
</modules>
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="TestRunnerService">
<option name="PROJECT_TEST_RUNNER" value="pytest" />
</component>
</module>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
</component>
</project>
\ No newline at end of file
FROM python:3.7-slim
ADD . /opt
WORKDIR /opt
RUN pip install -r requirements.txt
CMD python main.py
#---------------Service Configurations----------------#
SERVICE_CONFIG:
LOG_LEVEL: info
LOG_HANDLER_NAME: PreProccessComponent
LOGSTASH_HOST: 192.168.1.47
LOGSTASH_PORT: 5000
2021-02-23 17:56:17,606 INFO PreProccessComponent Got the data writing it to pickle file (preprocessed_X.pkl)
2021-02-23 17:56:17,606 INFO PreProccessComponent Reading Json file
2021-02-23 17:56:17,622 INFO PreProccessComponent Parsing the pickle file
2021-02-23 17:59:10,742 INFO PreProccessComponent Reading Json file
2021-02-23 17:59:10,760 INFO PreProccessComponent Parsing the pickle file
2021-02-23 18:01:29,652 INFO PreProccessComponent Reading Json file
2021-02-23 18:01:29,671 INFO PreProccessComponent Parsing the pickle file
2021-02-23 18:02:08,816 INFO PreProccessComponent Reading Json file
2021-02-23 18:02:08,827 INFO PreProccessComponent Parsing the pickle file
2021-02-23 18:02:40,875 INFO PreProccessComponent Reading Json file
2021-02-23 18:02:40,887 INFO PreProccessComponent Parsing the pickle file
2021-02-23 18:04:47,559 INFO PreProccessComponent Reading Json file
2021-02-23 18:04:47,570 INFO PreProccessComponent Parsing the pickle file
2021-02-23 18:06:03,437 INFO PreProccessComponent Reading Json file
2021-02-23 18:06:03,448 INFO PreProccessComponent Parsing the pickle file
2021-02-23 18:06:26,273 INFO PreProccessComponent Reading Json file
2021-02-23 18:06:26,282 INFO PreProccessComponent Parsing the pickle file
2021-02-23 18:06:49,400 INFO PreProccessComponent Reading Json file
2021-02-23 18:06:49,412 INFO PreProccessComponent Parsing the pickle file
2021-02-23 18:07:24,418 INFO PreProccessComponent Reading Json file
2021-02-23 18:07:24,430 INFO PreProccessComponent Parsing the pickle file
2021-02-23 18:08:25,242 INFO PreProccessComponent Reading Json file
2021-02-23 18:08:25,253 INFO PreProccessComponent Parsing the pickle file
2021-02-23 18:09:21,818 INFO PreProccessComponent Reading Json file
2021-02-23 18:09:21,830 INFO PreProccessComponent Parsing the pickle file
2021-02-23 18:12:27,419 INFO PreProccessComponent Reading Json file
2021-02-23 18:12:27,438 INFO PreProccessComponent Parsing the pickle file
2021-02-23 18:12:27,444 INFO PreProccessComponent Got the data writing it to pickle file (preprocessed_X.pkl)
2021-02-23 18:12:27,445 INFO PreProccessComponent Component executed Successfully
2021-02-23 18:13:15,958 INFO PreProccessComponent Reading Json file
2021-02-23 18:13:15,978 INFO PreProccessComponent Parsing the pickle file
2021-02-23 18:13:15,986 INFO PreProccessComponent Got the data writing it to pickle file (preprocessed_X.pkl)
2021-02-23 18:13:15,991 INFO PreProccessComponent Component executed Successfully
2021-02-23 18:13:32,970 INFO PreProccessComponent Reading Json file
2021-02-23 18:13:32,985 INFO PreProccessComponent Parsing the pickle file
2021-02-23 18:13:32,993 INFO PreProccessComponent Got the data writing it to pickle file (preprocessed_X.pkl)
2021-02-23 18:13:32,994 INFO PreProccessComponent Component executed Successfully
2021-02-23 18:14:52,633 INFO PreProccessComponent Reading Json file
2021-02-23 18:14:52,645 INFO PreProccessComponent Parsing the pickle file
2021-02-23 18:14:52,650 INFO PreProccessComponent Got the data writing it to pickle file (preprocessed_X.pkl)
2021-02-23 18:14:52,650 INFO PreProccessComponent Component executed Successfully
import requests
from scripts.common.logsetup import logger
from scripts.common.constants import PreProcessConstants, ComponentExceptions
from scripts.common.config_parser import *
import pickle
import pandas as pd
import json
import numpy as np
from sklearn.preprocessing import StandardScaler
class PreProcessComponent:
def __init__(self):
self.ds, self.tempmax, self.tempmin, self.temp, self.humidity, self.precip, self.winddir, self.cloudcover, self.visibility, self.date, self.windspeed = ([] for i in range(11))
def read_pickle_file(self, file):
logger.info("Parsing the pickle file")
return pickle.load(open(file, "rb"))
def preprocess(self, forcast_json, standard_scalar_pkl_path, null_value_method):
logger.info("Reading Json file")
with open(forcast_json) as f:
data = json.load(f)
for i in data[PreProcessConstants.DAYS_KEY]:
self.date.append(i[PreProcessConstants.DATETIME_KEY])
self.ds.append(int(i[PreProcessConstants.DATETIME_KEY].split("-")[1]))
self.tempmax.append(i[PreProcessConstants.TEMPMAX_KEY])
self.tempmin.append(i[PreProcessConstants.TEMPMIN_KEY])
self.temp.append(i[PreProcessConstants.TEMP_KEY])
self.humidity.append(i[PreProcessConstants.HUMIDITY_KEY])
self.precip.append(i[PreProcessConstants.PRECIP_KEY])
self.windspeed.append(i[PreProcessConstants.WINDSPEED_KEY])
self.winddir.append(i[PreProcessConstants.WINDDIR_KEY])
self.cloudcover.append(i[PreProcessConstants.CLOUDCOVER_KEY])
self.visibility.append(i[PreProcessConstants.VISIBILITY_KEY])
forcast_data = pd.DataFrame(
{"Maximum Temperature": self.tempmax, "Minimum Temperature": self.tempmin, "Temperature": self.temp,
"Precipitation": self.precip, "Wind Speed": self.windspeed, "Wind Direction": self.winddir,
"Visibility": self.visibility, "Cloud Cover": self.cloudcover,
"Relative Humidity": self.humidity, "month": self.ds})
forcast_data.fillna(method=null_value_method, inplace=True)
sc_X = self.read_pickle_file(standard_scalar_pkl_path)
X = sc_X.transform(forcast_data)
return X
if __name__ == '__main__':
# Checking shared Volume
if PreProcessConstants.SHARED_VOLUME in config.keys():
shared_volume = config[PreProcessConstants.SHARED_VOLUME]
else:
raise Exception(ComponentExceptions.INVALID_SHARED_VOLUME)
# Checking json path
if PreProcessConstants.JSON_PATH in config.keys():
json_path = config[PreProcessConstants.JSON_PATH]
else:
raise Exception(ComponentExceptions.INVALID_JSON_PATH)
# Checking pickle path for standard scalar
if PreProcessConstants.STANDARD_SCALAR_PATH in config.keys():
standard_scalar_path = config[PreProcessConstants.STANDARD_SCALAR_PATH]
else:
raise Exception(ComponentExceptions.INVALID_JSON_PATH)
# Checking Fill NA method
if PreProcessConstants.FILLNA_METHOD in config.keys():
fillna_method = config[PreProcessConstants.FILLNA_METHOD]
else:
raise Exception(ComponentExceptions.INVALID_Fillna_Method)
obj = PreProcessComponent()
data = obj.preprocess(json_path, standard_scalar_path, fillna_method)
logger.info("Got the data writing it to pickle file (preprocessed_X.pkl)")
try:
output = open(os.path.join(shared_volume, 'preprocessed_X.pkl'), 'wb')
pickle.dump(data, output)
output.close()
logger.info("Component executed Successfully")
except Exception as e:
raise Exception(e)
requests~=2.24.0
pyyaml~=5.3.1
python-logstash-async
pandas~=1.1.1
\ No newline at end of file
#!/usr/bin/env python
import os
import sys
import yaml
import json
config_path = os.path.join(os.getcwd(), "conf", "configuration.yml")
if os.path.exists(config_path):
sys.stderr.write("Reading config from --> {}".format(config_path))
sys.stderr.write("\n")
with open(config_path, 'r') as stream:
_config = yaml.safe_load(stream)
else:
sys.stderr.write("Configuration not found...")
sys.stderr.write("Exiting....")
sys.exit(1)
BASE_LOG_PATH = os.path.join(os.getcwd(), "logs")
if not os.path.exists(os.path.join(os.getcwd(), 'logs')):
os.mkdir(os.path.join(os.getcwd(), 'logs'))
LOG_LEVEL = os.environ.get("LOG_LEVEL", _config.get('SERVICE_CONFIG', {}).get("LOG_LEVEL", "INFO")).upper()
LOG_HANDLER_NAME = _config.get('SERVICE_CONFIG', {}).get("LOG_HANDLER_NAME", "PreProccessComponent")
ENABLE_LOGSTASH_LOG = os.environ.get("ENABLE_LOGSTASH_LOG", 'False').lower()
LOGSTASH_HOST = _config.get('SERVICE_CONFIG', {}).get('LOGSTASH_HOST')
LOGSTASH_PORT = str(_config.get('SERVICE_CONFIG', {}).get('LOGSTASH_PORT'))
# os.environ["shared_volume"] = "test"
# os.environ["json_path"] = r"E:\welspun-defects\preprocess_data-ilendev\response.json"
# os.environ["fillna_method"] = "ffill"
# os.environ["standard_scalar_path"] = r"E:\welspun-defects\preprocess_data-ilendev\StandardScaler.pkl"
config = {
"shared_volume": os.environ.get("shared_volume"),
"json_path": os.environ.get("json_path"),
"fillna_method": os.environ.get("fillna_method", default="ffill"),
"standard_scalar_path": os.environ.get("standard_scalar_path"),
}
if not os.path.exists(config['shared_volume']):
sys.stderr.write("Shared path does not exist!")
sys.stderr.write("Creating path --> {}".format(config['shared_volume']))
os.makedirs(config['shared_volume'])
#!/usr/bin/env python
class PreProcessConstants:
SHARED_VOLUME = "shared_volume"
JSON_PATH = "json_path"
FILLNA_METHOD = "fillna_method"
STANDARD_SCALAR_PATH = "standard_scalar_path"
TEMPMAX_KEY = "tempmax"
TEMPMIN_KEY = "tempmin"
TEMP_KEY = "temp"
HUMIDITY_KEY = "humidity"
PRECIP_KEY = "precip"
WINDSPEED_KEY = "windspeed"
WINDDIR_KEY = "winddir"
CLOUDCOVER_KEY = "cloudcover"
VISIBILITY_KEY = "visibility"
DAYS_KEY = "days"
DATETIME_KEY = "datetime"
REQUEST_SUCCESS_CODE = 200
ERRORS_KEY = "errors"
Default_headers = {'content-type': 'application/json'}
HTTP = "http://"
LOG_VAR_MESSAGE = "\n" + "#" * 25 + "\n" + "{}" + "\n" + "#" * 25 + "\n" + "{}\n"
class ComponentExceptions:
INVALID_SHARED_VOLUME = "Shared Volume is required"
INVALID_JSON_PATH = "Path to Json file is required"
INVALID_Standard_Scalar_Pkl_Path = "Path to Standard Scalar Pkl file is needed"
INVALID_Fillna_Method = "Method to fill NA values is required"
import os
import logging
from logging.handlers import RotatingFileHandler
from logstash_async.handler import AsynchronousLogstashHandler
from scripts.common.config_parser import LOG_LEVEL, LOG_HANDLER_NAME, BASE_LOG_PATH
from scripts.common.config_parser import LOG_LEVEL, LOG_HANDLER_NAME, BASE_LOG_PATH, LOGSTASH_HOST, LOGSTASH_PORT, ENABLE_LOGSTASH_LOG
DEFAULT_FORMAT = '%(asctime)s %(levelname)5s %(name)s %(message)s'
DEBUG_FORMAT = '%(asctime)s %(levelname)5s %(name)s [%(threadName)5s:%(filename)5s:%(funcName)5s():%(lineno)s] %(' \
'message)s '
EXTRA = {}
FORMATTER = DEFAULT_FORMAT
if LOG_LEVEL.strip() == "DEBUG":
FORMATTER = DEBUG_FORMAT
logging.trace = logging.DEBUG - 5
logging.addLevelName(logging.DEBUG - 5, 'TRACE')
class ILensLogger(logging.getLoggerClass()):
def __init__(self, name):
super().__init__(name)
def trace(self, msg, *args, **kwargs):
if self.isEnabledFor(logging.trace):
self._log(logging.trace, msg, args, **kwargs)
def get_logger(log_handler_name):
"""
Purpose : To create logger .
:param log_handler_name: Name of the log handler.
:return: logger object.
"""
log_path = os.path.join(BASE_LOG_PATH, log_handler_name + ".log")
logging.setLoggerClass(ILensLogger)
_logger = logging.getLogger(log_handler_name)
_logger.setLevel(LOG_LEVEL.strip().upper())
log_handler = logging.StreamHandler()
log_handler.setLevel(LOG_LEVEL)
formatter = logging.Formatter(FORMATTER)
log_handler.setFormatter(formatter)
handler = RotatingFileHandler(log_path, maxBytes=10485760,
backupCount=5)
handler.setFormatter(formatter)
_logger.addHandler(log_handler)
_logger.addHandler(handler)
if ENABLE_LOGSTASH_LOG == 'true' and LOGSTASH_PORT is not None and LOGSTASH_HOST is not None and LOGSTASH_PORT.isdigit():
_logger.addHandler(AsynchronousLogstashHandler(LOGSTASH_HOST, int(LOGSTASH_PORT), database_path=None))
return _logger
logger = get_logger(LOG_HANDLER_NAME)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment