summaryrefslogtreecommitdiff
path: root/searx/metrology/error_recorder.py
blob: f533e4e8b34471c70cc8bbd0c89d7a70ba5cc76d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import typing
import inspect
import logging
from json import JSONDecodeError
from urllib.parse import urlparse
from requests.exceptions import RequestException
from searx.exceptions import (SearxXPathSyntaxException, SearxEngineXPathException, SearxEngineAPIException,
                              SearxEngineAccessDeniedException)
from searx import logger


logging.basicConfig(level=logging.INFO)

errors_per_engines = {}


class ErrorContext:

    __slots__ = 'filename', 'function', 'line_no', 'code', 'exception_classname', 'log_message', 'log_parameters'

    def __init__(self, filename, function, line_no, code, exception_classname, log_message, log_parameters):
        self.filename = filename
        self.function = function
        self.line_no = line_no
        self.code = code
        self.exception_classname = exception_classname
        self.log_message = log_message
        self.log_parameters = log_parameters

    def __eq__(self, o) -> bool:
        if not isinstance(o, ErrorContext):
            return False
        return self.filename == o.filename and self.function == o.function and self.line_no == o.line_no\
            and self.code == o.code and self.exception_classname == o.exception_classname\
            and self.log_message == o.log_message and self.log_parameters == o.log_parameters

    def __hash__(self):
        return hash((self.filename, self.function, self.line_no, self.code, self.exception_classname, self.log_message,
                     self.log_parameters))

    def __repr__(self):
        return "ErrorContext({!r}, {!r}, {!r}, {!r}, {!r}, {!r})".\
            format(self.filename, self.line_no, self.code, self.exception_classname, self.log_message,
                   self.log_parameters)


def add_error_context(engine_name: str, error_context: ErrorContext) -> None:
    errors_for_engine = errors_per_engines.setdefault(engine_name, {})
    errors_for_engine[error_context] = errors_for_engine.get(error_context, 0) + 1
    logger.debug('%s: %s', engine_name, str(error_context))


def get_trace(traces):
    for trace in reversed(traces):
        split_filename = trace.filename.split('/')
        if '/'.join(split_filename[-3:-1]) == 'searx/engines':
            return trace
        if '/'.join(split_filename[-4:-1]) == 'searx/search/processors':
            return trace
    return traces[-1]


def get_hostname(exc: RequestException) -> typing.Optional[None]:
    url = exc.request.url
    if url is None and exc.response is not None:
        url = exc.response.url
    return urlparse(url).netloc


def get_request_exception_messages(exc: RequestException)\
        -> typing.Tuple[typing.Optional[str], typing.Optional[str], typing.Optional[str]]:
    url = None
    status_code = None
    reason = None
    hostname = None
    if exc.request is not None:
        url = exc.request.url
    if url is None and exc.response is not None:
        url = exc.response.url
    if url is not None:
        hostname = str(urlparse(url).netloc)
    if exc.response is not None:
        status_code = str(exc.response.status_code)
        reason = exc.response.reason
    return (status_code, reason, hostname)


def get_messages(exc, filename) -> typing.Tuple:
    if isinstance(exc, JSONDecodeError):
        return (exc.msg, )
    if isinstance(exc, TypeError):
        return (str(exc), )
    if isinstance(exc, ValueError) and 'lxml' in filename:
        return (str(exc), )
    if isinstance(exc, RequestException):
        return get_request_exception_messages(exc)
    if isinstance(exc, SearxXPathSyntaxException):
        return (exc.xpath_str, exc.message)
    if isinstance(exc, SearxEngineXPathException):
        return (exc.xpath_str, exc.message)
    if isinstance(exc, SearxEngineAPIException):
        return (str(exc.args[0]), )
    if isinstance(exc, SearxEngineAccessDeniedException):
        return (exc.message, )
    return ()


def get_exception_classname(exc: Exception) -> str:
    exc_class = exc.__class__
    exc_name = exc_class.__qualname__
    exc_module = exc_class.__module__
    if exc_module is None or exc_module == str.__class__.__module__:
        return exc_name
    return exc_module + '.' + exc_name


def get_error_context(framerecords, exception_classname, log_message, log_parameters) -> ErrorContext:
    searx_frame = get_trace(framerecords)
    filename = searx_frame.filename
    function = searx_frame.function
    line_no = searx_frame.lineno
    code = searx_frame.code_context[0].strip()
    del framerecords
    return ErrorContext(filename, function, line_no, code, exception_classname, log_message, log_parameters)


def record_exception(engine_name: str, exc: Exception) -> None:
    framerecords = inspect.trace()
    try:
        exception_classname = get_exception_classname(exc)
        log_parameters = get_messages(exc, framerecords[-1][1])
        error_context = get_error_context(framerecords, exception_classname, None, log_parameters)
        add_error_context(engine_name, error_context)
    finally:
        del framerecords


def record_error(engine_name: str, log_message: str, log_parameters: typing.Optional[typing.Tuple] = None) -> None:
    framerecords = list(reversed(inspect.stack()[1:]))
    try:
        error_context = get_error_context(framerecords, None, log_message, log_parameters or ())
        add_error_context(engine_name, error_context)
    finally:
        del framerecords