diff --git a/get_page_stats.py b/get_page_stats.py index fdd0734..1cb1515 100755 --- a/get_page_stats.py +++ b/get_page_stats.py @@ -67,7 +67,7 @@ class TelegrafExporter(): self.source = source def telegraf_post(self, timestamp:int, create_time: int, title:str, - location:str, count:int)-> requests.Response: + metric:str, count:int)-> requests.Response: """ Post a value to telegraf :param timestamp: timestamp used by influxdb as time field. :param create_time: second of the day at which the data point is exported @@ -80,7 +80,7 @@ class TelegrafExporter(): "timestamp": timestamp, "create_time": create_time, "source": self.source, - "location": location, + "location": metric, "hits": count} return requests.post(self.telegraf_url, json=payload, @@ -89,8 +89,8 @@ class TelegrafExporter(): def export_result_to_telegraf(self, page_hits: VisitDict, bot_hits: VisitDict, - user_agents: VisitDict, - methods: Counter, + user_agents: Dict[str, int], + methods: Counter[str], timestamp: int) -> None: """ Export the bot_hits and page_hits dictionnaries to telegraf """ @@ -125,32 +125,32 @@ class TelegrafExporter(): sys.exit(1) # export user agent variety name="user_agent_variety" - for ua_type, uas in user_agents.items(): + for metric_name, count in user_agents.items(): try: response = self.telegraf_post(timestamp, create_time, name, - ua_type, - uas) + metric_name, + count) response.raise_for_status() except requests.exceptions.RequestException as excpt: print(excpt) sys.exit(1) # export method variety name="method_variety" - for method, count in methods.items(): + for metric_name, count in methods.items(): try: response = self.telegraf_post(timestamp, create_time, name, - method, + metric_name, count) response.raise_for_status() except requests.exceptions.RequestException as excpt: print(excpt) sys.exit(1) -def get_crawler_patterns(exclude_crawler: bool) -> List[str]: +def get_crawler_patterns(exclude_crawler: bool) -> List[re.Pattern[str]]: """ Parse the crawler-user-agent file, and returns a list of compiled regex crawler patterns """ @@ -192,8 +192,9 @@ def get_locations(sitemap_path:str) -> List[str]: def parse_logfile(logfile_path: str, locations: List[str], - crawler_patterns: List[str]) -> Tuple[VisitDict, VisitDict, - VisitDict, Dict[str, int]]: + crawler_patterns: List[re.Pattern[str]]) -> Tuple[VisitDict, VisitDict, + VisitDict, Dict[str, int], + Counter[str]]: """ Parse a logfile, and return 4 dicts: page_hits, bot_hits, other_hits and additional_infos """ @@ -213,9 +214,9 @@ def parse_logfile(logfile_path: str, locations: List[str], visit_dict: VisitDict = dict(map(lambda x: (x, set()), locations)) bot_visit_dict: VisitDict = dict(map(lambda x: (x, set()), locations)) other_visit_dict: VisitDict = defaultdict(set) - bot_user_agents = set() - client_user_agents = set() - method_counter = Counter() + bot_user_agents: Set[str] = set() + client_user_agents: Set[str] = set() + method_counter: Counter[str] = Counter() # The way to get the timezone data here is not great (not taking into account DST and such) # but it is a fallback default date that should hardly ever be used. last_log_date = datetime.now(datetime.now().astimezone().tzinfo).strftime(time_local_fmt) @@ -267,11 +268,10 @@ def parse_logfile(logfile_path: str, locations: List[str], microsecond=0) additional_infos = {"last_log_timestamp": int(today_date.timestamp()), "bot_user_agents_nb": len(bot_user_agents), - "client_user_agents_nb": len(client_user_agents), - "methods": method_counter} + "client_user_agents_nb": len(client_user_agents)} - return visit_dict, bot_visit_dict, other_visit_dict, additional_infos + return visit_dict, bot_visit_dict, other_visit_dict, additional_infos, method_counter def main() -> None: @@ -303,9 +303,10 @@ def main() -> None: # Get parser, get locations and parse the log file crawler_patterns = get_crawler_patterns(args.exclude_crawler) locations = get_locations(args.sitemap) - visit_dict, bot_visit_dict, other_visit_dict, additional_infos = parse_logfile(args.logfile, - locations, - crawler_patterns) + (visit_dict, bot_visit_dict, other_visit_dict, + additional_infos, method_counter) = parse_logfile(args.logfile, + locations, + crawler_patterns) # Generate the report print_visit_dict("Standard visits", visit_dict) @@ -314,7 +315,7 @@ def main() -> None: print_visit_dict("Bot visits", bot_visit_dict) print(f"There were {additional_infos['bot_user_agents_nb']} unique bot user agent(s)") print_visit_dict("Other visits", other_visit_dict) - for method, count in additional_infos["methods"].items(): + for method, count in method_counter.items(): print(f"{method}: {count}") if telegraf_url: @@ -324,9 +325,11 @@ def main() -> None: source=socket.gethostname()) exporter.export_result_to_telegraf(visit_dict, bot_visit_dict, - {"bot_user_agents":additional_infos['bot_user_agents_nb'], - "client_user_agents": additional_infos['client_user_agents_nb']}, - additional_infos["methods"], + {"bot_user_agents": + additional_infos['bot_user_agents_nb'], + "client_user_agents": + additional_infos['client_user_agents_nb']}, + method_counter, additional_infos["last_log_timestamp"]) if __name__ == "__main__":