<?xml version="1.0" encoding="UTF-8"?><rss version="2.0"
	xmlns:content="http://purl.org/rss/1.0/modules/content/"
	xmlns:wfw="http://wellformedweb.org/CommentAPI/"
	xmlns:dc="http://purl.org/dc/elements/1.1/"
	xmlns:atom="http://www.w3.org/2005/Atom"
	xmlns:sy="http://purl.org/rss/1.0/modules/syndication/"
	xmlns:slash="http://purl.org/rss/1.0/modules/slash/"
	>

<channel>
	<title>PDFs Archives - LIEFFIE</title>
	<atom:link href="https://lieffie.com/tag/pdfs/feed/" rel="self" type="application/rss+xml" />
	<link>https://lieffie.com/tag/pdfs/</link>
	<description>Digital News and Development</description>
	<lastBuildDate>Tue, 11 Mar 2025 11:15:36 +0000</lastBuildDate>
	<language>en-US</language>
	<sy:updatePeriod>
	hourly	</sy:updatePeriod>
	<sy:updateFrequency>
	1	</sy:updateFrequency>
	<generator>https://wordpress.org/?v=6.7.2</generator>

<image>
	<url>https://lieffie.com/wp-content/uploads/2025/02/cropped-to-help-ais-understand-the-world-researchers-put-them-in-a-robot-32x32.jpg</url>
	<title>PDFs Archives - LIEFFIE</title>
	<link>https://lieffie.com/tag/pdfs/</link>
	<width>32</width>
	<height>32</height>
</image> 
	<item>
		<title>Why extracting data from PDFs is still a nightmare for data experts</title>
		<link>https://lieffie.com/digital-news/why-extracting-data-from-pdfs-is-still-a-nightmare-for-data-experts/</link>
		
		<dc:creator><![CDATA[wiredgorilla]]></dc:creator>
		<pubDate>Tue, 11 Mar 2025 11:15:36 +0000</pubDate>
				<category><![CDATA[Digital News]]></category>
		<category><![CDATA[acquisition]]></category>
		<category><![CDATA[AI]]></category>
		<category><![CDATA[ars]]></category>
		<category><![CDATA[Biz & IT]]></category>
		<category><![CDATA[ChatGPT]]></category>
		<category><![CDATA[chatgtp]]></category>
		<category><![CDATA[Computational Journalism]]></category>
		<category><![CDATA[Derek Willis]]></category>
		<category><![CDATA[Google]]></category>
		<category><![CDATA[large language models]]></category>
		<category><![CDATA[machine learning]]></category>
		<category><![CDATA[Mistral]]></category>
		<category><![CDATA[Mistral OCR]]></category>
		<category><![CDATA[ocr]]></category>
		<category><![CDATA[optical character recognition]]></category>
		<category><![CDATA[PDFs]]></category>
		<category><![CDATA[Ray Kurzweil]]></category>
		<category><![CDATA[Simon Willison]]></category>
		<category><![CDATA[Strategy]]></category>
		<category><![CDATA[Tech]]></category>
		<category><![CDATA[technology]]></category>
		<category><![CDATA[trust]]></category>
		<guid isPermaLink="false">https://lieffie.com/digital-news/why-extracting-data-from-pdfs-is-still-a-nightmare-for-data-experts/</guid>

					<description><![CDATA[<p>&#8220;The biggest [drawback] is that they are probabilistic prediction machines and will get it wrong in ways that aren&#8217;t just &#8216;that&#8217;s the wrong...</p>
<p>The post <a href="https://lieffie.com/digital-news/why-extracting-data-from-pdfs-is-still-a-nightmare-for-data-experts/">Why extracting data from PDFs is still a nightmare for data experts</a> appeared first on <a href="https://lieffie.com">LIEFFIE</a>.</p>
]]></description>
		
		
		
			</item>
	</channel>
</rss>
