Some initial work on xml parsing

This commit is contained in:
Lennart
2024-11-24 15:09:34 +01:00
parent 7dfa0c9062
commit a9ef680c30
14 changed files with 1132 additions and 0 deletions

View File

@@ -0,0 +1,231 @@
use core::panic;
use heck::{ToKebabCase, ToPascalCase};
use quote::ToTokens;
use strum::EnumString;
use syn::{
punctuated::Punctuated, token::Comma, Attribute, Expr, ExprLit, Lit, LitByteStr, LitStr, Meta,
};
const ATTR_SCOPE: &str = "xml";
#[derive(Default)]
pub struct VariantAttrs {
pub rename: Option<String>,
pub ns: Option<String>,
}
pub fn get_scoped_attrs(attrs: &[Attribute]) -> Option<Punctuated<Meta, Comma>> {
attrs
.iter()
.find(|attr| attr.path().is_ident(ATTR_SCOPE))
.map(|attr| {
attr.parse_args_with(Punctuated::<Meta, Comma>::parse_terminated)
.unwrap()
})
}
pub fn parse_variant_attrs(attrs: &[Attribute]) -> VariantAttrs {
let mut variant_attrs = VariantAttrs::default();
let attrs = get_scoped_attrs(attrs);
let attrs = if let Some(attrs) = attrs {
attrs
} else {
return variant_attrs;
};
for meta in attrs {
match meta {
// single flag
Meta::Path(path) => {
panic!("unrecognized variant flag: {}", path.to_token_stream());
}
Meta::List(list) => {
panic!("list-type attrs not supported: {}", list.to_token_stream());
}
Meta::NameValue(name_value) => {
if name_value.path.is_ident("ns") {
if let Expr::Lit(ExprLit {
lit: Lit::Str(lit_str),
..
}) = name_value.value
{
variant_attrs.ns = Some(lit_str.value());
}
} else if name_value.path.is_ident("rename") {
if let Expr::Lit(ExprLit {
lit: Lit::Str(lit_str),
..
}) = name_value.value
{
variant_attrs.rename = Some(lit_str.value());
}
} else {
panic!(
"unrecognized variant attribute: {}",
name_value.to_token_stream()
);
}
}
}
}
variant_attrs
}
#[derive(EnumString)]
pub enum CaseStyle {
#[strum(serialize = "kebab-case")]
KebabCase,
#[strum(serialize = "PascalCase")]
PascalCase,
}
impl CaseStyle {
fn transform_text(&self, input: &str) -> String {
match self {
Self::KebabCase => input.to_kebab_case(),
Self::PascalCase => input.to_pascal_case(),
}
}
}
#[derive(Default)]
pub struct EnumAttrs {
pub case_style: Option<CaseStyle>,
pub ns_strict: bool,
}
fn parse_enum_attrs(attrs: &[Attribute]) -> EnumAttrs {
let enum_attrs = EnumAttrs::default();
enum_attrs
}
#[derive(Default)]
pub struct StructAttrs {
pub root: Option<LitByteStr>,
}
pub fn parse_struct_attrs(attrs: &[Attribute]) -> StructAttrs {
let mut struct_attrs = StructAttrs::default();
let attrs = get_scoped_attrs(attrs);
let attrs = if let Some(attrs) = attrs {
attrs
} else {
return struct_attrs;
};
for meta in attrs {
match meta {
// single flag
Meta::Path(_path) => {
panic!("invalid path attribute")
}
Meta::List(list) => {
panic!("list-type attrs not supported: {}", list.to_token_stream());
}
Meta::NameValue(name_value) => {
if name_value.path.is_ident("root") {
if let Expr::Lit(ExprLit {
lit: Lit::ByteStr(lit_str),
..
}) = name_value.value
{
struct_attrs.root = Some(lit_str);
}
} else {
panic!(
"unrecognized field attribute: {}",
name_value.to_token_stream()
);
}
}
}
}
struct_attrs
}
#[derive(Default)]
pub struct FieldAttrs {
pub rename: Option<String>,
pub ns: Option<String>,
pub text: bool,
pub untagged: bool,
pub flatten: bool,
pub default: Option<syn::ExprPath>,
}
pub fn parse_field_attrs(attrs: &[Attribute]) -> FieldAttrs {
let mut field_attrs = FieldAttrs::default();
let attrs = get_scoped_attrs(attrs);
let attrs = if let Some(attrs) = attrs {
attrs
} else {
return field_attrs;
};
for meta in attrs {
match meta {
// single flag
Meta::Path(path) => {
if path.is_ident("text") {
field_attrs.text = true;
}
if path.is_ident("untagged") {
field_attrs.untagged = true;
}
if path.is_ident("flatten") {
field_attrs.flatten = true;
}
}
Meta::List(list) => {
panic!("list-type attrs not supported: {}", list.to_token_stream());
}
Meta::NameValue(name_value) => {
if name_value.path.is_ident("ns") {
if let Expr::Lit(ExprLit {
lit: Lit::Str(lit_str),
..
}) = name_value.value
{
field_attrs.ns = Some(lit_str.value());
}
} else if name_value.path.is_ident("rename") {
if let Expr::Lit(ExprLit {
lit: Lit::Str(lit_str),
..
}) = name_value.value
{
field_attrs.rename = Some(lit_str.value());
} else {
panic!("invalid rename attribute");
}
} else if name_value.path.is_ident("default") {
if let Expr::Lit(ExprLit {
lit: Lit::Str(lit_str),
..
}) = name_value.value
{
let a: syn::ExprPath = syn::parse_str(&lit_str.value())
.expect("could not parse default attribute as expression");
field_attrs.default = Some(a);
} else {
panic!("invalid default attribute");
}
} else {
panic!(
"unrecognized field attribute: {}",
name_value.to_token_stream()
);
}
}
}
}
field_attrs
}

View File

@@ -0,0 +1,74 @@
use crate::de::attrs::parse_variant_attrs;
use proc_macro2::Span;
use quote::quote;
use syn::{DataEnum, DeriveInput, Fields, FieldsUnnamed, Variant};
pub fn enum_variant_branch(variant: &Variant) -> proc_macro2::TokenStream {
let ident = &variant.ident;
match &variant.fields {
Fields::Named(named) => {
panic!("struct variants are not supported, please use a tuple variant with a struct")
}
Fields::Unnamed(FieldsUnnamed { unnamed, .. }) => {
if unnamed.len() != 1 {
panic!("tuple variants should contain exactly one element");
}
let field = unnamed.iter().next().unwrap();
quote! {
let val = #field::deserialize(reader, start, empty)?;
Ok(Self::#ident(val))
}
}
Fields::Unit => {
quote! {
// Make sure that content is still consumed
::rustical_xml::Unit::deserialize(reader, start, empty)?;
Ok(Self::#ident)
}
}
}
}
pub fn impl_de_enum(input: &DeriveInput, data: &DataEnum) -> proc_macro2::TokenStream {
let (impl_generics, type_generics, where_clause) = input.generics.split_for_impl();
let name = &input.ident;
let variants = data.variants.iter().map(|variant| {
let attrs = parse_variant_attrs(&variant.attrs);
let variant_name = attrs.rename.unwrap_or(variant.ident.to_string());
let variant_name = syn::LitByteStr::new(variant_name.as_bytes(), Span::call_site());
let branch = enum_variant_branch(variant);
// dbg!(variant.fields.to_token_stream());
quote! {
#variant_name => {
#branch
}
}
});
quote! {
impl #impl_generics ::rustical_xml::XmlDeserialize for #name #type_generics #where_clause {
fn deserialize<R: std::io::BufRead>(
reader: &mut quick_xml::NsReader<R>,
start: &quick_xml::events::BytesStart,
empty: bool
) -> Result<Self, rustical_xml::XmlError> {
use quick_xml::events::Event;
let (_ns, name) = reader.resolve_element(start.name());
match name.as_ref() {
#(#variants)*
name => {
// Handle invalid variant name
Err(rustical_xml::XmlError::InvalidVariant(String::from_utf8_lossy(name).to_string()))
}
}
}
}
}
}

View File

@@ -0,0 +1,278 @@
use super::attrs::{parse_field_attrs, parse_struct_attrs, FieldAttrs};
use core::panic;
use heck::ToKebabCase;
use proc_macro2::Span;
use quote::quote;
use syn::{AngleBracketedGenericArguments, DataStruct, DeriveInput, TypePath};
fn get_generic_type(ty: &syn::Type) -> Option<&syn::Type> {
if let syn::Type::Path(TypePath { path, .. }) = ty {
if let Some(seg) = path.segments.last() {
if let syn::PathArguments::AngleBracketed(AngleBracketedGenericArguments {
args, ..
}) = &seg.arguments
{
if let Some(syn::GenericArgument::Type(t)) = &args.first() {
return Some(t);
}
}
}
}
None
}
pub struct Field {
pub field: syn::Field,
pub attrs: FieldAttrs,
}
impl Field {
fn from_syn_field(field: syn::Field) -> Self {
Self {
attrs: parse_field_attrs(&field.attrs),
field,
}
}
fn de_name(&self) -> String {
self.attrs
.rename
.to_owned()
.unwrap_or(self.field_ident().to_string().to_kebab_case())
}
fn field_ident(&self) -> &syn::Ident {
self.field
.ident
.as_ref()
.expect("tuple structs not supported")
}
fn ty(&self) -> &syn::Type {
&self.field.ty
}
fn builder_field(&self) -> proc_macro2::TokenStream {
let field_ident = self.field_ident();
let ty = self.ty();
if self.attrs.default.is_some() {
quote! {
#field_ident: #ty,
}
} else if self.attrs.flatten {
let generic_type = get_generic_type(ty).expect("flatten attribute only implemented for explicit generics (rustical_xml will assume the first generic as the inner type)");
quote! {
#field_ident: Vec<#generic_type>,
}
} else {
quote! {
#field_ident: Option<#ty>,
}
}
}
fn builder_field_init(&self) -> proc_macro2::TokenStream {
let field_ident = self.field_ident();
if let Some(default) = &self.attrs.default {
quote! {
#field_ident: #default(),
}
} else if self.attrs.flatten {
quote! {
#field_ident: vec![],
}
} else {
quote! {
#field_ident: None,
}
}
}
fn builder_field_build(&self) -> proc_macro2::TokenStream {
let field_ident = self.field_ident();
if self.attrs.flatten {
quote! {
#field_ident: FromIterator::from_iter(builder.#field_ident.into_iter())
}
} else if self.attrs.default.is_some() {
quote! {
#field_ident: builder.#field_ident,
}
} else {
quote! {
#field_ident: builder.#field_ident.expect("todo: handle missing field"),
}
}
}
fn named_branch(&self) -> Option<proc_macro2::TokenStream> {
if self.attrs.text {
return None;
}
if self.attrs.untagged {
return None;
}
let field_name = syn::LitByteStr::new(self.de_name().as_bytes(), Span::call_site());
let field_ident = self.field_ident();
let deserializer = self.ty();
Some(if self.attrs.default.is_some() {
quote! {
#field_name => {
builder.#field_ident = <#deserializer as rustical_xml::XmlDeserialize>::deserialize(reader, &start, empty)?;
},
}
} else if self.attrs.flatten {
let deserializer = get_generic_type(self.ty()).expect("flatten attribute only implemented for explicit generics (rustical_xml will assume the first generic as the inner type)");
quote! {
#field_name => {
builder.#field_ident.push(<#deserializer as rustical_xml::XmlDeserialize>::deserialize(reader, &start, empty)?);
},
}
} else {
quote! {
#field_name => {
builder.#field_ident = Some(<#deserializer as rustical_xml::XmlDeserialize>::deserialize(reader, &start, empty)?);
},
}
})
}
fn untagged_branch(&self) -> Option<proc_macro2::TokenStream> {
if !self.attrs.untagged {
return None;
}
let field_ident = self.field_ident();
let deserializer = self.ty();
Some(quote! {
_ => {
builder.#field_ident = Some(<#deserializer as rustical_xml::XmlDeserialize>::deserialize(reader, &start, empty)?);
},
})
}
fn text_branch(&self) -> Option<proc_macro2::TokenStream> {
if !self.attrs.text {
return None;
}
let field_ident = self.field_ident();
Some(quote! {
builder.#field_ident = Some(text.to_owned().into());
})
}
}
pub fn impl_de_struct(input: &DeriveInput, data: &DataStruct) -> proc_macro2::TokenStream {
let (impl_generics, type_generics, where_clause) = input.generics.split_for_impl();
let name = &input.ident;
let struct_attrs = parse_struct_attrs(&input.attrs);
let fields: Vec<_> = data
.fields
.iter()
.map(|field| Field::from_syn_field(field.to_owned()))
.collect();
let builder_fields = fields.iter().map(Field::builder_field);
let builder_field_inits = fields.iter().map(Field::builder_field_init);
let named_field_branches = fields.iter().filter_map(Field::named_branch);
let untagged_field_branches: Vec<_> =
fields.iter().filter_map(Field::untagged_branch).collect();
if untagged_field_branches.len() > 1 {
panic!("Currently only one untagged field supported!");
}
let text_field_branches = fields.iter().filter_map(Field::text_branch);
let builder_field_builds = fields.iter().map(Field::builder_field_build);
let xml_root_impl = if let Some(root) = struct_attrs.root {
quote! {
impl #impl_generics ::rustical_xml::XmlRoot for #name #type_generics #where_clause {
fn root_tag() -> &'static [u8] {
#root
}
}
}
} else {
quote! {}
};
quote! {
#xml_root_impl
impl #impl_generics ::rustical_xml::XmlDeserialize for #name #type_generics #where_clause {
fn deserialize<R: std::io::BufRead>(
reader: &mut quick_xml::NsReader<R>,
start: &quick_xml::events::BytesStart,
empty: bool
) -> Result<Self, rustical_xml::XmlError> {
use quick_xml::events::Event;
use rustical_xml::XmlError;
let mut buf = Vec::new();
// initialise fields
struct StructBuilder {
#(#builder_fields)*
}
let mut builder = StructBuilder {
#(#builder_field_inits)*
};
if !empty {
loop {
let event = reader.read_event_into(&mut buf)?;
match &event {
Event::End(e) if e.name() == start.name() => {
break;
}
Event::Eof => return Err(XmlError::Eof),
// start of a child element
Event::Start(start) | Event::Empty(start) => {
let empty = matches!(event, Event::Empty(_));
let (_ns, name) = reader.resolve_element(start.name());
match name.as_ref() {
#(#named_field_branches)*
#(#untagged_field_branches)*
_ => {
// invalid field name
return Err(XmlError::UnknownError)
}
}
}
Event::Text(bytes_text) => {
let text = bytes_text.unescape()?;
#(#text_field_branches)*
}
Event::CData(cdata) => {
return Err(XmlError::UnsupportedEvent("CDATA"));
}
Event::Comment(_) => {
// ignore
}
Event::Decl(_) => {
// Error: not supported
return Err(XmlError::UnsupportedEvent("Declaration"));
}
Event::PI(_) => {
// Error: not supported
return Err(XmlError::UnsupportedEvent("Processing instruction"));
}
Event::DocType(doctype) => {
// Error: start of new document
return Err(XmlError::UnsupportedEvent("Doctype in the middle of the document"));
}
Event::End(end) => {
// Error: premature end
return Err(XmlError::Other("Unexpected closing tag for wrong element".to_owned()));
}
}
}
}
Ok(Self {
#(#builder_field_builds)*
})
}
}
}
}

View File

@@ -0,0 +1,6 @@
pub mod attrs;
mod de_enum;
mod de_struct;
pub use de_enum::impl_de_enum;
pub use de_struct::impl_de_struct;

View File

@@ -0,0 +1,18 @@
use core::panic;
use syn::{parse_macro_input, DeriveInput};
mod de;
use de::{impl_de_enum, impl_de_struct};
#[proc_macro_derive(XmlDeserialize, attributes(xml))]
pub fn derive_xml_deserialize(input: proc_macro::TokenStream) -> proc_macro::TokenStream {
let input = parse_macro_input!(input as DeriveInput);
match &input.data {
syn::Data::Enum(e) => impl_de_enum(&input, e),
syn::Data::Struct(s) => impl_de_struct(&input, s),
syn::Data::Union(_) => panic!("Union not supported"),
}
.into()
}