Skip to content

Plan & query

Describe a search before running it, and build field-tagged Boolean queries.

scopus_query

scopus_query(*terms, op='AND', field=None)

Combine terms into one Scopus query, optionally field-wrapping each.

Parameters:

Name Type Description Default
*terms str

One or more non-empty search terms.

()
op str

The boolean operator joining the terms: "AND", "OR" or "AND NOT".

'AND'
field str | None

An optional field tag applied to every term (see :data:FIELD_TAGS).

None
Source code in src/scopusflow/query.py
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
def scopus_query(*terms: str, op: str = "AND", field: str | None = None) -> str:
    """Combine terms into one Scopus query, optionally field-wrapping each.

    Parameters
    ----------
    *terms:
        One or more non-empty search terms.
    op:
        The boolean operator joining the terms: ``"AND"``, ``"OR"`` or ``"AND NOT"``.
    field:
        An optional field tag applied to every term (see :data:`FIELD_TAGS`).
    """
    if op not in {"AND", "OR", "AND NOT"}:
        raise ValueError("op must be one of 'AND', 'OR', 'AND NOT'.")
    cleaned = [t.strip() for t in terms]
    if not cleaned or any(not t for t in cleaned):
        raise ValueError("All terms must be non-empty.")
    return f" {op} ".join(wrap_field(t, field) for t in cleaned)

wrap_field

wrap_field(query, field)

Wrap query in a field tag, e.g. TITLE-ABS-KEY(graphene).

Source code in src/scopusflow/query.py
26
27
28
29
30
31
32
33
def wrap_field(query: str, field: str | None) -> str:
    """Wrap ``query`` in a field tag, e.g. ``TITLE-ABS-KEY(graphene)``."""
    if field is None:
        return query
    field = field.strip().upper()
    if not _FIELD_RE.match(field):
        raise ValueError(f"Invalid field tag {field!r}; use letters and hyphens only.")
    return f"{field}({query})"

FIELD_TAGS module-attribute

FIELD_TAGS = {'TITLE': 'Words in the document title', 'TITLE-ABS-KEY': 'Title, abstract and keywords', 'TITLE-ABS-KEY-AUTH': 'Title, abstract, keywords and author names', 'ABS': 'Abstract text', 'KEY': 'Indexed and author keywords', 'AUTH': 'Author names', 'AUTHKEY': 'Author-supplied keywords', 'AFFIL': 'Affiliation, any part', 'AFFILORG': 'Affiliation organisation name', 'SRCTITLE': 'Source (publication) title', 'DOI': 'Digital Object Identifier', 'ALL': 'All available fields'}

SearchPlan dataclass

A fully specified, inspectable description of a Scopus search.

Splitting describing a search from executing it makes a workflow reproducible and lets a large retrieval be partitioned by year, so it can be cached and resumed.

Source code in src/scopusflow/plan.py
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
@dataclass(frozen=True)
class SearchPlan:
    """A fully specified, inspectable description of a Scopus search.

    Splitting *describing* a search from *executing* it makes a workflow
    reproducible and lets a large retrieval be partitioned by year, so it can be
    cached and resumed.
    """

    query: str
    years: Optional[Sequence[int]] = None
    field: Optional[str] = None
    view: str = "STANDARD"
    partition: str = "none"  # "none" or "year"

    def __post_init__(self) -> None:
        if not self.query or not self.query.strip():
            raise ValueError("query must be a non-empty string.")
        if self.view not in {"STANDARD", "COMPLETE"}:
            raise ValueError("view must be 'STANDARD' or 'COMPLETE'.")
        if self.partition not in {"none", "year"}:
            raise ValueError("partition must be 'none' or 'year'.")
        if self.partition == "year" and not self.years:
            raise ValueError("partition='year' requires years.")

    @property
    def wrapped_query(self) -> str:
        return wrap_field(self.query, self.field)

    def cells(self) -> list[PlanCell]:
        """Expand the plan into the cells that will be fetched."""
        q = self.wrapped_query
        if self.partition == "year":
            years = sorted(set(self.years))  # type: ignore[arg-type]
            return [
                PlanCell(i + 1, q, str(y), y, self.view) for i, y in enumerate(years)
            ]
        date = None
        if self.years:
            lo, hi = min(self.years), max(self.years)
            date = str(lo) if lo == hi else f"{lo}-{hi}"
        return [PlanCell(1, q, date, None, self.view)]

cells

cells()

Expand the plan into the cells that will be fetched.

Source code in src/scopusflow/plan.py
51
52
53
54
55
56
57
58
59
60
61
62
63
def cells(self) -> list[PlanCell]:
    """Expand the plan into the cells that will be fetched."""
    q = self.wrapped_query
    if self.partition == "year":
        years = sorted(set(self.years))  # type: ignore[arg-type]
        return [
            PlanCell(i + 1, q, str(y), y, self.view) for i, y in enumerate(years)
        ]
    date = None
    if self.years:
        lo, hi = min(self.years), max(self.years)
        date = str(lo) if lo == hi else f"{lo}-{hi}"
    return [PlanCell(1, q, date, None, self.view)]

PlanCell dataclass

One unit of work in a :class:SearchPlan.

Source code in src/scopusflow/plan.py
11
12
13
14
15
16
17
18
19
@dataclass(frozen=True)
class PlanCell:
    """One unit of work in a :class:`SearchPlan`."""

    cell: int
    query: str
    date: Optional[str]
    year: Optional[int]
    view: str