For parsing text+html, I am using Nokogiri. And below are sample code.
my_html = "this is some html"
tokens = HTML::Tokenizer.new(my_html)
tags = []
while token = tokens.next
node = HTML::Node.parse(nil, 0, 0, token, false)
tags << node if node.tag? and node.closing != :close
end
tags.first.name # => "div"
tags.first.attributes # => {"class"=>"content doublespace", "id"=>"main-content"}
Above code is referred in
Ruby on Rails Gotchas. I created a code below and I explained it how it works.
First, I defined a method and named it as html_parse with parameter. Then initialized all variables that I need. I put it as global so I can access it entire the whole method.
json_content = {}
collect_models = []
msg = ""
user_id = ""
tags = []
Then, let's initialize the string html. And let us parse it and pass to tags array.
tokens = HTML::Tokenizer.new(str_html)
while token = tokens.next
node = HTML::Node.parse(nil, 0, 0, token, false)
tags << node
end
And, loop the tags array.
(0..(tags.size - 1).to_i).each do |i|
:
:
:
:
:
end
Inside of the loop there's a couple of conditions for you to determine what per array contains. If it is a TEXT or TAGS.
if tags[i].class == HTML::Text
if not user_id.blank?
msg += "#{tags[i].content}"
collect_models << {name: "{fa-child #ffffff}#{tags[i].content}", id: user_id, type: 'User', value: tags[i].content}
user_id = ""
else
msg += tags[i].content
end
end
if tags[i].class == HTML::Tag
if not tags[i].attributes.nil?
if tags[i].attributes["user_id"].nil? == false
user_id = tags[i].attributes["user_id"]
msg += "{fa-child #ffffff}"
end
if tags[i].attributes["model_id"].nil? == false
model_id = tags[i].attributes["model_id"]
@model = Model.find(model_id)
model_name = @model.try(:name)
icon = @model.dependent_model.try(:icon)
model_type_name = @model.dependent_model.try(:name)
msg += "{#{icon} #fff} "
collect_models << {name: "{#{icon} #000} #{model_name}", id: model_id, type: model_type_name, value: model_name}
end
end
end
Then, at the end it returns the whole response via json response.
json_content = {message: msg, models: collect_models}
Here is the full code.
def self.html_parse(str_html, app_url)
json_content = {}
collect_models = []
msg = ""
user_id = ""
tags = []
tokens = HTML::Tokenizer.new(str_html)
while token = tokens.next
node = HTML::Node.parse(nil, 0, 0, token, false)
tags << node
end
(0..(tags.size - 1).to_i).each do |i|
if tags[i].class == HTML::Text
if not user_id.blank?
msg += "#{tags[i].content}"
collect_models << {name: "{fa-child #ffffff}#{tags[i].content}", id: user_id, type: 'User', value: tags[i].content}
user_id = ""
else
msg += tags[i].content
end
end
if tags[i].class == HTML::Tag
if not tags[i].attributes.nil?
if tags[i].attributes["user_id"].nil? == false
user_id = tags[i].attributes["user_id"]
msg += "{fa-child #ffffff}"
end
if tags[i].attributes["model_id"].nil? == false
model_id = tags[i].attributes["model_id"]
@model = Model.find(model_id)
model_name = @model.try(:name)
icon = @model.dependent_model.try(:icon)
model_type_name = @model.dependent_model.try(:name)
msg += "{#{icon} #fff} "
collect_models << {name: "{#{icon} #000} #{model_name}", id: model_id, type: model_type_name, value: model_name}
end
end
end
end
json_content = {message: msg, models: collect_models}
end
No comments:
Post a Comment