Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion docs/file-format.md
Original file line number Diff line number Diff line change
Expand Up @@ -537,9 +537,13 @@ ampersands, and hyphens. Numeric variables can express a single number or a
range and contain only integers, but may contain negative numbers. Numeric variables can have a non-numeric prefix and suffix.

```yaml
page-range: S10-15
page-range: S10-15 # Page S10 to 15
```

Note that if you specify a number with a numeric affix, the whole variable will
be interpreted as a [string](#string) instead. This improves the style for
atypical page numbers like `11E201`.

#### Unicode Language Identifier

A [Unicode Language Identifier](https://unicode.org/reports/tr35/tr35.html#unicode_language_id) identifies a language or its variants. At the simplest, you can specify an all-lowercase [two-letter ISO 639-1 code](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) like `en` or `es` as a language. It is possible to specify regions, scripts, or variants to more precisely identify a variety of a language, especially in cases where the ISO 639-1 code is considered a "macrolanguage" (`zh` includes both Cantonese and Mandarin). In such cases, specify values like `en-US` for American English or `zh-Hans-CN` for Mandarin written in simplified script in mainland China. The region tags have to be written in all-caps and are mostly corresponding to [ISO 3166-1 alpha_2](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2#Officially_assigned_code_elements) codes.
Expand Down
4 changes: 4 additions & 0 deletions src/csl/rendering/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1132,6 +1132,10 @@ impl<T: EntryLike> Iterator for BranchConditionIter<'_, '_, T> {
self.ctx.resolve_number_variable(var),
Some(NumberVariableResult::Regular(MaybeTyped::Typed(_)))
),
Variable::Page(var) => matches!(
self.ctx.resolve_page_variable(var),
Some(MaybeTyped::Typed(_))
),
_ => false,
})
} else {
Expand Down
34 changes: 34 additions & 0 deletions src/types/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -552,6 +552,12 @@ mod tests {
assert!(val.suffix.is_none());
assert_eq!(&val.to_string(), "-5");

let val = Numeric::from_str("01").unwrap();
assert!(val.value == NumericValue::Number(1));
assert_eq!(val.prefix_str(), Some("0"));
assert!(val.suffix.is_none());
assert_eq!(&val.to_string(), "01");

let val = Numeric::from_str("1st").unwrap();
assert!(val.value == NumericValue::Number(1));
assert!(val.prefix.is_none());
Expand Down Expand Up @@ -606,6 +612,34 @@ mod tests {
assert!(Numeric::from_str("2nd edition").is_err());
}

#[test]
fn test_preserve_space_separator() {
// https://github.com/typst/hayagriva/issues/312
// https://github.com/typst/hayagriva/issues/440
let serial_numbers =
&["ISO/IEC 23009-1:2022(E)", "GB/T 7714—2025", "GB/T 7714", "第 6 册"];
for s in serial_numbers {
let val: MaybeTyped<Numeric> = MaybeTyped::infallible_from_str(s);
// It can be either typed or string, as long as whitespaces between
// affixes and numbers are preserved.
assert_eq!(val.to_string(), *s);
}

// For GB standards, em dash is the recommended separator, but
// hyphen-minus and en dash should also be supported.
let dashes = ["-", "–", "—"];
for dash in dashes {
let s = format!("GB/T 7714{dash}2015");
let val: MaybeTyped<Numeric> = MaybeTyped::infallible_from_str(&s);
assert_eq!(
val.to_string()
.replace(dashes[0], dashes[2])
.replace(dashes[1], dashes[2]),
format!("GB/T 7714{}2015", dashes[2])
);
}
}

#[test]
#[cfg(feature = "biblatex")]
fn test_issue_227() {
Expand Down
31 changes: 25 additions & 6 deletions src/types/numeric.rs
Original file line number Diff line number Diff line change
Expand Up @@ -246,18 +246,30 @@ impl FromStr for Numeric {

fn from_str(value: &str) -> Result<Self, Self::Err> {
let mut s = Scanner::new(value);
let prefix =
s.eat_while(|c: char| !c.is_numeric() && !c.is_whitespace() && c != '-');
s.eat_whitespace();

let prefix = {
// Eat non-numeric characters and leading zeros.
let start = s.cursor();
s.eat_while(|c: char| !c.is_numeric() && c != '-');
let zeros = s.eat_while('0');
if !zeros.is_empty() && s.peek().is_none_or(|c| !c.is_numeric()) {
// Uneat the last zero if the value is just zero.
s.uneat();
}
s.from(start)
};

let value = number(&mut s).ok_or(NumericError::NoNumber)?;
s.eat_whitespace();
let space_after_value = s.eat_whitespace();

let value = match s.peek() {
Some(c) if is_delimiter(c) => {
s.eat();
s.eat_until(|c: char| !is_delimiter(c));
let mut items = vec![(value, Some(NumericDelimiter::try_from(c)?))];
loop {
s.eat_whitespace();
let num = number(&mut s).ok_or(NumericError::NoNumber)?;
s.eat_whitespace();
match NumericDelimiter::from_str(s.eat_while(is_delimiter)) {
Expand All @@ -276,7 +288,7 @@ impl FromStr for Numeric {
_ => NumericValue::Number(value),
};
s.eat_whitespace();
let post = s.eat_while(|c: char| !c.is_whitespace());
let post = s.eat_while(|c: char| !c.is_numeric() && !c.is_whitespace());

if !s.after().is_empty() {
return Err(NumericError::UnexpectedCharactersAfterPostfix);
Expand All @@ -289,7 +301,11 @@ impl FromStr for Numeric {
} else {
Some(Box::new(prefix.to_string()))
},
suffix: if post.is_empty() { None } else { Some(Box::new(post.to_string())) },
suffix: if post.is_empty() {
None
} else {
Some(Box::new(format!("{space_after_value}{post}")))
},
})
}
}
Expand Down Expand Up @@ -324,8 +340,11 @@ pub enum NumericError {
MissingDelimiter,
}

/// Eat a number from the scanner, assuming leading whitespaces and zeros have
/// already been eaten.
///
/// The number can be positive, negative, or zero.
fn number(s: &mut Scanner) -> Option<i32> {
s.eat_whitespace();
let negative = s.eat_if('-');
let num = s.eat_while(|c: char| c.is_numeric());
if num.is_empty() {
Expand Down
27 changes: 27 additions & 0 deletions src/types/page.rs
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,8 @@ where

#[cfg(test)]
mod test {
use super::*;

#[test]
fn group_by() {
fn group(s: &str) -> Vec<&'_ str> {
Expand All @@ -410,4 +412,29 @@ mod test {
assert_eq!(["–a", ","], group("–a,").as_slice());
assert_eq!(["a–", ",", "–b"], group("a–,–b").as_slice());
}

#[test]
fn nonnumeric_page() {
// https://github.com/typst/hayagriva/issues/170
for s in &["11E201", "1.36"] {
Copy link
Copy Markdown
Contributor Author

@YDX-2147483647 YDX-2147483647 Jan 14, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

CSL spec says nothing about 1.36, and citeproc-js (citext) thinks it's non-numeric.

图片
Test code
#import "@local/citext:0.3.0": *

#let source = ```bib
@book{a,
  title = {A},
  pages =  {1a},
}
@book{b,
  title = {B},
  pages =  {1.36},
}
```.text

#let csl = ```xml
<?xml version="1.0" encoding="utf-8"?>
<style xmlns="http://purl.org/net/xbiblio/csl" class="note" version="1.0" name-as-sort-order="all" sort-separator=" " demote-non-dropping-particle="never" initialize-with=" " initialize-with-hyphen="false" page-range-format="expanded" default-locale="zh-CN">
  <info>
    <id />
    <title />
    <updated>2009-08-10T04:49:00+09:00</updated>
  </info>
  <citation>
    <layout/>
  </citation>
  <bibliography>
    <layout delimiter="; ">
      <group delimiter=" ">
        <text variable="title"/>
        <text variable="page"/>
        <choose>
          <if is-numeric="page">
            <text value="(is-numeric)"/>
          </if>
          <else>
            <text value="(non-numeric)"/>
          </else>
        </choose>
      </group>
    </layout>
  </bibliography>
</style>
```.text

#let bib = init-citation(source, mode: "stable", csl: csl)
#show: show-extcite.with(bib: bib, gen-id: true)

- @a
- @b

#show bibliography: none
#bibliography(bytes(source))

#extbib(bib)

let n: MaybeTyped<PageRanges> = MaybeTyped::infallible_from_str(s);
assert_eq!(n, MaybeTyped::String(s.to_string()));
}

// Page ranges should still be parsed as numeric values.
assert_eq!(
MaybeTyped::<PageRanges>::infallible_from_str("S10-15"),
MaybeTyped::Typed(PageRanges::new(vec![PageRangesPart::Range(
Numeric::from_str("S10").unwrap(),
Numeric::from_str("15").unwrap(),
)]))
);
assert_eq!(
MaybeTyped::<PageRanges>::infallible_from_str("011-012"),
MaybeTyped::Typed(PageRanges::new(vec![PageRangesPart::Range(
Numeric::from_str("011").unwrap(),
Numeric::from_str("012").unwrap(),
)]))
);
}
}
2 changes: 2 additions & 0 deletions tests/citeproc-pass.txt
Original file line number Diff line number Diff line change
Expand Up @@ -409,7 +409,9 @@ nameorder_ShortNameAsSortDemoteNever
namespaces_NonNada3
number_FailingDelimiters
number_IsNumericWithAlpha
number_LeadingZeros
number_MixedPageRange
number_MixedText
number_PageFirst
number_PageRange
number_SimpleNumberArabic
Expand Down