Fix type annotation
This commit is contained in:
parent
68e25b3a84
commit
2a09c82a50
|
|
@ -67,7 +67,7 @@ class TelegrafExporter():
|
||||||
self.source = source
|
self.source = source
|
||||||
|
|
||||||
def telegraf_post(self, timestamp:int, create_time: int, title:str,
|
def telegraf_post(self, timestamp:int, create_time: int, title:str,
|
||||||
location:str, count:int)-> requests.Response:
|
metric:str, count:int)-> requests.Response:
|
||||||
""" Post a value to telegraf
|
""" Post a value to telegraf
|
||||||
:param timestamp: timestamp used by influxdb as time field.
|
:param timestamp: timestamp used by influxdb as time field.
|
||||||
:param create_time: second of the day at which the data point is exported
|
:param create_time: second of the day at which the data point is exported
|
||||||
|
|
@ -80,7 +80,7 @@ class TelegrafExporter():
|
||||||
"timestamp": timestamp,
|
"timestamp": timestamp,
|
||||||
"create_time": create_time,
|
"create_time": create_time,
|
||||||
"source": self.source,
|
"source": self.source,
|
||||||
"location": location,
|
"location": metric,
|
||||||
"hits": count}
|
"hits": count}
|
||||||
return requests.post(self.telegraf_url,
|
return requests.post(self.telegraf_url,
|
||||||
json=payload,
|
json=payload,
|
||||||
|
|
@ -89,8 +89,8 @@ class TelegrafExporter():
|
||||||
|
|
||||||
def export_result_to_telegraf(self, page_hits: VisitDict,
|
def export_result_to_telegraf(self, page_hits: VisitDict,
|
||||||
bot_hits: VisitDict,
|
bot_hits: VisitDict,
|
||||||
user_agents: VisitDict,
|
user_agents: Dict[str, int],
|
||||||
methods: Counter,
|
methods: Counter[str],
|
||||||
timestamp: int) -> None:
|
timestamp: int) -> None:
|
||||||
""" Export the bot_hits and page_hits dictionnaries to telegraf
|
""" Export the bot_hits and page_hits dictionnaries to telegraf
|
||||||
"""
|
"""
|
||||||
|
|
@ -125,32 +125,32 @@ class TelegrafExporter():
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
# export user agent variety
|
# export user agent variety
|
||||||
name="user_agent_variety"
|
name="user_agent_variety"
|
||||||
for ua_type, uas in user_agents.items():
|
for metric_name, count in user_agents.items():
|
||||||
try:
|
try:
|
||||||
response = self.telegraf_post(timestamp,
|
response = self.telegraf_post(timestamp,
|
||||||
create_time,
|
create_time,
|
||||||
name,
|
name,
|
||||||
ua_type,
|
metric_name,
|
||||||
uas)
|
count)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
except requests.exceptions.RequestException as excpt:
|
except requests.exceptions.RequestException as excpt:
|
||||||
print(excpt)
|
print(excpt)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
# export method variety
|
# export method variety
|
||||||
name="method_variety"
|
name="method_variety"
|
||||||
for method, count in methods.items():
|
for metric_name, count in methods.items():
|
||||||
try:
|
try:
|
||||||
response = self.telegraf_post(timestamp,
|
response = self.telegraf_post(timestamp,
|
||||||
create_time,
|
create_time,
|
||||||
name,
|
name,
|
||||||
method,
|
metric_name,
|
||||||
count)
|
count)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
except requests.exceptions.RequestException as excpt:
|
except requests.exceptions.RequestException as excpt:
|
||||||
print(excpt)
|
print(excpt)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
def get_crawler_patterns(exclude_crawler: bool) -> List[str]:
|
def get_crawler_patterns(exclude_crawler: bool) -> List[re.Pattern[str]]:
|
||||||
""" Parse the crawler-user-agent file, and returns a list
|
""" Parse the crawler-user-agent file, and returns a list
|
||||||
of compiled regex crawler patterns
|
of compiled regex crawler patterns
|
||||||
"""
|
"""
|
||||||
|
|
@ -192,8 +192,9 @@ def get_locations(sitemap_path:str) -> List[str]:
|
||||||
|
|
||||||
|
|
||||||
def parse_logfile(logfile_path: str, locations: List[str],
|
def parse_logfile(logfile_path: str, locations: List[str],
|
||||||
crawler_patterns: List[str]) -> Tuple[VisitDict, VisitDict,
|
crawler_patterns: List[re.Pattern[str]]) -> Tuple[VisitDict, VisitDict,
|
||||||
VisitDict, Dict[str, int]]:
|
VisitDict, Dict[str, int],
|
||||||
|
Counter[str]]:
|
||||||
""" Parse a logfile, and return 4 dicts:
|
""" Parse a logfile, and return 4 dicts:
|
||||||
page_hits, bot_hits, other_hits and additional_infos
|
page_hits, bot_hits, other_hits and additional_infos
|
||||||
"""
|
"""
|
||||||
|
|
@ -213,9 +214,9 @@ def parse_logfile(logfile_path: str, locations: List[str],
|
||||||
visit_dict: VisitDict = dict(map(lambda x: (x, set()), locations))
|
visit_dict: VisitDict = dict(map(lambda x: (x, set()), locations))
|
||||||
bot_visit_dict: VisitDict = dict(map(lambda x: (x, set()), locations))
|
bot_visit_dict: VisitDict = dict(map(lambda x: (x, set()), locations))
|
||||||
other_visit_dict: VisitDict = defaultdict(set)
|
other_visit_dict: VisitDict = defaultdict(set)
|
||||||
bot_user_agents = set()
|
bot_user_agents: Set[str] = set()
|
||||||
client_user_agents = set()
|
client_user_agents: Set[str] = set()
|
||||||
method_counter = Counter()
|
method_counter: Counter[str] = Counter()
|
||||||
# The way to get the timezone data here is not great (not taking into account DST and such)
|
# The way to get the timezone data here is not great (not taking into account DST and such)
|
||||||
# but it is a fallback default date that should hardly ever be used.
|
# but it is a fallback default date that should hardly ever be used.
|
||||||
last_log_date = datetime.now(datetime.now().astimezone().tzinfo).strftime(time_local_fmt)
|
last_log_date = datetime.now(datetime.now().astimezone().tzinfo).strftime(time_local_fmt)
|
||||||
|
|
@ -267,11 +268,10 @@ def parse_logfile(logfile_path: str, locations: List[str],
|
||||||
microsecond=0)
|
microsecond=0)
|
||||||
additional_infos = {"last_log_timestamp": int(today_date.timestamp()),
|
additional_infos = {"last_log_timestamp": int(today_date.timestamp()),
|
||||||
"bot_user_agents_nb": len(bot_user_agents),
|
"bot_user_agents_nb": len(bot_user_agents),
|
||||||
"client_user_agents_nb": len(client_user_agents),
|
"client_user_agents_nb": len(client_user_agents)}
|
||||||
"methods": method_counter}
|
|
||||||
|
|
||||||
|
|
||||||
return visit_dict, bot_visit_dict, other_visit_dict, additional_infos
|
return visit_dict, bot_visit_dict, other_visit_dict, additional_infos, method_counter
|
||||||
|
|
||||||
|
|
||||||
def main() -> None:
|
def main() -> None:
|
||||||
|
|
@ -303,7 +303,8 @@ def main() -> None:
|
||||||
# Get parser, get locations and parse the log file
|
# Get parser, get locations and parse the log file
|
||||||
crawler_patterns = get_crawler_patterns(args.exclude_crawler)
|
crawler_patterns = get_crawler_patterns(args.exclude_crawler)
|
||||||
locations = get_locations(args.sitemap)
|
locations = get_locations(args.sitemap)
|
||||||
visit_dict, bot_visit_dict, other_visit_dict, additional_infos = parse_logfile(args.logfile,
|
(visit_dict, bot_visit_dict, other_visit_dict,
|
||||||
|
additional_infos, method_counter) = parse_logfile(args.logfile,
|
||||||
locations,
|
locations,
|
||||||
crawler_patterns)
|
crawler_patterns)
|
||||||
|
|
||||||
|
|
@ -314,7 +315,7 @@ def main() -> None:
|
||||||
print_visit_dict("Bot visits", bot_visit_dict)
|
print_visit_dict("Bot visits", bot_visit_dict)
|
||||||
print(f"There were {additional_infos['bot_user_agents_nb']} unique bot user agent(s)")
|
print(f"There were {additional_infos['bot_user_agents_nb']} unique bot user agent(s)")
|
||||||
print_visit_dict("Other visits", other_visit_dict)
|
print_visit_dict("Other visits", other_visit_dict)
|
||||||
for method, count in additional_infos["methods"].items():
|
for method, count in method_counter.items():
|
||||||
print(f"{method}: {count}")
|
print(f"{method}: {count}")
|
||||||
|
|
||||||
if telegraf_url:
|
if telegraf_url:
|
||||||
|
|
@ -324,9 +325,11 @@ def main() -> None:
|
||||||
source=socket.gethostname())
|
source=socket.gethostname())
|
||||||
exporter.export_result_to_telegraf(visit_dict,
|
exporter.export_result_to_telegraf(visit_dict,
|
||||||
bot_visit_dict,
|
bot_visit_dict,
|
||||||
{"bot_user_agents":additional_infos['bot_user_agents_nb'],
|
{"bot_user_agents":
|
||||||
"client_user_agents": additional_infos['client_user_agents_nb']},
|
additional_infos['bot_user_agents_nb'],
|
||||||
additional_infos["methods"],
|
"client_user_agents":
|
||||||
|
additional_infos['client_user_agents_nb']},
|
||||||
|
method_counter,
|
||||||
additional_infos["last_log_timestamp"])
|
additional_infos["last_log_timestamp"])
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue