Fix type annotation

This commit is contained in:
Hugo 2021-10-10 17:04:09 +02:00
parent 68e25b3a84
commit 2a09c82a50
1 changed files with 28 additions and 25 deletions

View File

@ -67,7 +67,7 @@ class TelegrafExporter():
self.source = source self.source = source
def telegraf_post(self, timestamp:int, create_time: int, title:str, def telegraf_post(self, timestamp:int, create_time: int, title:str,
location:str, count:int)-> requests.Response: metric:str, count:int)-> requests.Response:
""" Post a value to telegraf """ Post a value to telegraf
:param timestamp: timestamp used by influxdb as time field. :param timestamp: timestamp used by influxdb as time field.
:param create_time: second of the day at which the data point is exported :param create_time: second of the day at which the data point is exported
@ -80,7 +80,7 @@ class TelegrafExporter():
"timestamp": timestamp, "timestamp": timestamp,
"create_time": create_time, "create_time": create_time,
"source": self.source, "source": self.source,
"location": location, "location": metric,
"hits": count} "hits": count}
return requests.post(self.telegraf_url, return requests.post(self.telegraf_url,
json=payload, json=payload,
@ -89,8 +89,8 @@ class TelegrafExporter():
def export_result_to_telegraf(self, page_hits: VisitDict, def export_result_to_telegraf(self, page_hits: VisitDict,
bot_hits: VisitDict, bot_hits: VisitDict,
user_agents: VisitDict, user_agents: Dict[str, int],
methods: Counter, methods: Counter[str],
timestamp: int) -> None: timestamp: int) -> None:
""" Export the bot_hits and page_hits dictionnaries to telegraf """ Export the bot_hits and page_hits dictionnaries to telegraf
""" """
@ -125,32 +125,32 @@ class TelegrafExporter():
sys.exit(1) sys.exit(1)
# export user agent variety # export user agent variety
name="user_agent_variety" name="user_agent_variety"
for ua_type, uas in user_agents.items(): for metric_name, count in user_agents.items():
try: try:
response = self.telegraf_post(timestamp, response = self.telegraf_post(timestamp,
create_time, create_time,
name, name,
ua_type, metric_name,
uas) count)
response.raise_for_status() response.raise_for_status()
except requests.exceptions.RequestException as excpt: except requests.exceptions.RequestException as excpt:
print(excpt) print(excpt)
sys.exit(1) sys.exit(1)
# export method variety # export method variety
name="method_variety" name="method_variety"
for method, count in methods.items(): for metric_name, count in methods.items():
try: try:
response = self.telegraf_post(timestamp, response = self.telegraf_post(timestamp,
create_time, create_time,
name, name,
method, metric_name,
count) count)
response.raise_for_status() response.raise_for_status()
except requests.exceptions.RequestException as excpt: except requests.exceptions.RequestException as excpt:
print(excpt) print(excpt)
sys.exit(1) sys.exit(1)
def get_crawler_patterns(exclude_crawler: bool) -> List[str]: def get_crawler_patterns(exclude_crawler: bool) -> List[re.Pattern[str]]:
""" Parse the crawler-user-agent file, and returns a list """ Parse the crawler-user-agent file, and returns a list
of compiled regex crawler patterns of compiled regex crawler patterns
""" """
@ -192,8 +192,9 @@ def get_locations(sitemap_path:str) -> List[str]:
def parse_logfile(logfile_path: str, locations: List[str], def parse_logfile(logfile_path: str, locations: List[str],
crawler_patterns: List[str]) -> Tuple[VisitDict, VisitDict, crawler_patterns: List[re.Pattern[str]]) -> Tuple[VisitDict, VisitDict,
VisitDict, Dict[str, int]]: VisitDict, Dict[str, int],
Counter[str]]:
""" Parse a logfile, and return 4 dicts: """ Parse a logfile, and return 4 dicts:
page_hits, bot_hits, other_hits and additional_infos page_hits, bot_hits, other_hits and additional_infos
""" """
@ -213,9 +214,9 @@ def parse_logfile(logfile_path: str, locations: List[str],
visit_dict: VisitDict = dict(map(lambda x: (x, set()), locations)) visit_dict: VisitDict = dict(map(lambda x: (x, set()), locations))
bot_visit_dict: VisitDict = dict(map(lambda x: (x, set()), locations)) bot_visit_dict: VisitDict = dict(map(lambda x: (x, set()), locations))
other_visit_dict: VisitDict = defaultdict(set) other_visit_dict: VisitDict = defaultdict(set)
bot_user_agents = set() bot_user_agents: Set[str] = set()
client_user_agents = set() client_user_agents: Set[str] = set()
method_counter = Counter() method_counter: Counter[str] = Counter()
# The way to get the timezone data here is not great (not taking into account DST and such) # The way to get the timezone data here is not great (not taking into account DST and such)
# but it is a fallback default date that should hardly ever be used. # but it is a fallback default date that should hardly ever be used.
last_log_date = datetime.now(datetime.now().astimezone().tzinfo).strftime(time_local_fmt) last_log_date = datetime.now(datetime.now().astimezone().tzinfo).strftime(time_local_fmt)
@ -267,11 +268,10 @@ def parse_logfile(logfile_path: str, locations: List[str],
microsecond=0) microsecond=0)
additional_infos = {"last_log_timestamp": int(today_date.timestamp()), additional_infos = {"last_log_timestamp": int(today_date.timestamp()),
"bot_user_agents_nb": len(bot_user_agents), "bot_user_agents_nb": len(bot_user_agents),
"client_user_agents_nb": len(client_user_agents), "client_user_agents_nb": len(client_user_agents)}
"methods": method_counter}
return visit_dict, bot_visit_dict, other_visit_dict, additional_infos return visit_dict, bot_visit_dict, other_visit_dict, additional_infos, method_counter
def main() -> None: def main() -> None:
@ -303,7 +303,8 @@ def main() -> None:
# Get parser, get locations and parse the log file # Get parser, get locations and parse the log file
crawler_patterns = get_crawler_patterns(args.exclude_crawler) crawler_patterns = get_crawler_patterns(args.exclude_crawler)
locations = get_locations(args.sitemap) locations = get_locations(args.sitemap)
visit_dict, bot_visit_dict, other_visit_dict, additional_infos = parse_logfile(args.logfile, (visit_dict, bot_visit_dict, other_visit_dict,
additional_infos, method_counter) = parse_logfile(args.logfile,
locations, locations,
crawler_patterns) crawler_patterns)
@ -314,7 +315,7 @@ def main() -> None:
print_visit_dict("Bot visits", bot_visit_dict) print_visit_dict("Bot visits", bot_visit_dict)
print(f"There were {additional_infos['bot_user_agents_nb']} unique bot user agent(s)") print(f"There were {additional_infos['bot_user_agents_nb']} unique bot user agent(s)")
print_visit_dict("Other visits", other_visit_dict) print_visit_dict("Other visits", other_visit_dict)
for method, count in additional_infos["methods"].items(): for method, count in method_counter.items():
print(f"{method}: {count}") print(f"{method}: {count}")
if telegraf_url: if telegraf_url:
@ -324,9 +325,11 @@ def main() -> None:
source=socket.gethostname()) source=socket.gethostname())
exporter.export_result_to_telegraf(visit_dict, exporter.export_result_to_telegraf(visit_dict,
bot_visit_dict, bot_visit_dict,
{"bot_user_agents":additional_infos['bot_user_agents_nb'], {"bot_user_agents":
"client_user_agents": additional_infos['client_user_agents_nb']}, additional_infos['bot_user_agents_nb'],
additional_infos["methods"], "client_user_agents":
additional_infos['client_user_agents_nb']},
method_counter,
additional_infos["last_log_timestamp"]) additional_infos["last_log_timestamp"])
if __name__ == "__main__": if __name__ == "__main__":